diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..56139da287a5e235abc1956f3d8dd45d95a5c472
Binary files /dev/null and b/.DS_Store differ
diff --git a/openai_whisper-base/.DS_Store b/openai_whisper-base/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..d201e7daf933a7690268e4ddaa239a963c812ab2
Binary files /dev/null and b/openai_whisper-base/.DS_Store differ
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-base/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..13849a4b8da17682bb6eaed253707eac7f2befd0
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4e096b2abd561f00b9b698401df3fbe1a0d0c8d2476ff19cb4e1995680e827e
+size 243
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-base/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..07ecae5bac3d83aa3b7bd1d17505cdfeaf109145
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e316980638e2099e83cb1a93b903717dc12b3c2168d0ab69113764c3767696ba
+size 347
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-base/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b82ac5a57544279ce9f5c82c78f6fa5891520cc
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 54,
+      "Ios16.rsqrt" : 13,
+      "Ios16.mul" : 218,
+      "SliceByIndex" : 336,
+      "Ios16.sub" : 13,
+      "Transpose" : 6,
+      "Ios16.einsum" : 384,
+      "Ios16.conv" : 38,
+      "Ios16.add" : 26,
+      "Ios16.reduceMean" : 26,
+      "Ios16.softmax" : 192,
+      "Ios16.gelu" : 8,
+      "Ios16.batchNorm" : 13
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/model.mil b/openai_whisper-base/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..31b155b9b8384df140e6f7080248842f9aea1c90
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,3322 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<int32, [2]> var_38 = const()[name = tensor<string, []>("op_38"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_44 = const()[name = tensor<string, []>("op_44"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_49 = const()[name = tensor<string, []>("op_49"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_54_pad_type_0 = const()[name = tensor<string, []>("op_54_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [512, 80, 1, 3]> var_29_to_fp16 = const()[name = tensor<string, []>("op_29_to_fp16"), val = tensor<fp16, [512, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [512]> var_35_to_fp16 = const()[name = tensor<string, []>("op_35_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245888)))];
+            tensor<fp16, [1, 512, 1, 3000]> var_54_cast_fp16 = conv(bias = var_35_to_fp16, dilations = var_44, groups = var_49, pad = var_54_pad_0, pad_type = var_54_pad_type_0, strides = var_38, weight = var_29_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_54_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_54_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [2]> var_78 = const()[name = tensor<string, []>("op_78"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_84 = const()[name = tensor<string, []>("op_84"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_89 = const()[name = tensor<string, []>("op_89"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_94_pad_type_0 = const()[name = tensor<string, []>("op_94_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_94_pad_0 = const()[name = tensor<string, []>("op_94_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [512, 512, 1, 3]> var_69_to_fp16 = const()[name = tensor<string, []>("op_69_to_fp16"), val = tensor<fp16, [512, 512, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246976)))];
+            tensor<fp16, [512]> var_75_to_fp16 = const()[name = tensor<string, []>("op_75_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1819904)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_94_cast_fp16 = conv(bias = var_75_to_fp16, dilations = var_84, groups = var_89, pad = var_94_pad_0, pad_type = var_94_pad_type_0, strides = var_78, weight = var_69_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_94_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_94_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> var_112_to_fp16 = const()[name = tensor<string, []>("op_112_to_fp16"), val = tensor<fp16, [1, 512, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1820992)))];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_112_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_122 = const()[name = tensor<string, []>("op_122"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_135 = const()[name = tensor<string, []>("op_135"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_136 = const()[name = tensor<string, []>("op_136"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_146 = const()[name = tensor<string, []>("op_146"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_146, keep_dims = var_136, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_150 = const()[name = tensor<string, []>("op_150"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_151_cast_fp16 = reduce_mean(axes = var_150, keep_dims = var_136, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_151_cast_fp16")];
+            tensor<fp16, []> var_152_to_fp16 = const()[name = tensor<string, []>("op_152_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_153_cast_fp16 = add(x = var_151_cast_fp16, y = var_152_to_fp16)[name = tensor<string, []>("op_153_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_153_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [512]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3357056)))];
+            tensor<fp16, [512]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3358144)))];
+            tensor<fp16, [512]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3359232)))];
+            tensor<fp16, [512]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3360320)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_170 = const()[name = tensor<string, []>("op_170"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3361408)))];
+            tensor<fp16, [512]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3885760)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_170, groups = var_135, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_168, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_174 = const()[name = tensor<string, []>("op_174"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_176 = const()[name = tensor<string, []>("op_176"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3886848)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_1_cast_fp16 = conv(dilations = var_176, groups = var_135, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_174, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<int32, [2]> var_181 = const()[name = tensor<string, []>("op_181"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4411200)))];
+            tensor<fp16, [512]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4935552)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_183, groups = var_135, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_181, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_190_begin_0 = const()[name = tensor<string, []>("op_190_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_190_end_0 = const()[name = tensor<string, []>("op_190_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_190_end_mask_0 = const()[name = tensor<string, []>("op_190_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_190_cast_fp16 = slice_by_index(begin = var_190_begin_0, end = var_190_end_0, end_mask = var_190_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_190_cast_fp16")];
+            tensor<int32, [4]> var_194_begin_0 = const()[name = tensor<string, []>("op_194_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_194_end_0 = const()[name = tensor<string, []>("op_194_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_194_end_mask_0 = const()[name = tensor<string, []>("op_194_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_194_cast_fp16 = slice_by_index(begin = var_194_begin_0, end = var_194_end_0, end_mask = var_194_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_194_cast_fp16")];
+            tensor<int32, [4]> var_198_begin_0 = const()[name = tensor<string, []>("op_198_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_198_end_0 = const()[name = tensor<string, []>("op_198_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_198_end_mask_0 = const()[name = tensor<string, []>("op_198_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_198_cast_fp16 = slice_by_index(begin = var_198_begin_0, end = var_198_end_0, end_mask = var_198_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_198_cast_fp16")];
+            tensor<int32, [4]> var_202_begin_0 = const()[name = tensor<string, []>("op_202_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_202_end_0 = const()[name = tensor<string, []>("op_202_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_202_end_mask_0 = const()[name = tensor<string, []>("op_202_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_202_cast_fp16 = slice_by_index(begin = var_202_begin_0, end = var_202_end_0, end_mask = var_202_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_202_cast_fp16")];
+            tensor<int32, [4]> var_206_begin_0 = const()[name = tensor<string, []>("op_206_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_206_end_0 = const()[name = tensor<string, []>("op_206_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_206_end_mask_0 = const()[name = tensor<string, []>("op_206_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_206_cast_fp16 = slice_by_index(begin = var_206_begin_0, end = var_206_end_0, end_mask = var_206_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
+            tensor<int32, [4]> var_210_begin_0 = const()[name = tensor<string, []>("op_210_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_210_end_0 = const()[name = tensor<string, []>("op_210_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_210_end_mask_0 = const()[name = tensor<string, []>("op_210_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<int32, [4]> var_214_begin_0 = const()[name = tensor<string, []>("op_214_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_214_end_0 = const()[name = tensor<string, []>("op_214_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_214_end_mask_0 = const()[name = tensor<string, []>("op_214_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
+            tensor<int32, [4]> var_218_begin_0 = const()[name = tensor<string, []>("op_218_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_218_end_0 = const()[name = tensor<string, []>("op_218_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_218_end_mask_0 = const()[name = tensor<string, []>("op_218_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_218_cast_fp16")];
+            tensor<int32, [4]> var_227_begin_0 = const()[name = tensor<string, []>("op_227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_227_end_0 = const()[name = tensor<string, []>("op_227_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_227_end_mask_0 = const()[name = tensor<string, []>("op_227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = var_190_cast_fp16)[name = tensor<string, []>("op_227_cast_fp16")];
+            tensor<int32, [4]> var_234_begin_0 = const()[name = tensor<string, []>("op_234_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_234_end_0 = const()[name = tensor<string, []>("op_234_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_234_end_mask_0 = const()[name = tensor<string, []>("op_234_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = var_190_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<int32, [4]> var_241_begin_0 = const()[name = tensor<string, []>("op_241_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_241_end_0 = const()[name = tensor<string, []>("op_241_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_241_end_mask_0 = const()[name = tensor<string, []>("op_241_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = var_190_cast_fp16)[name = tensor<string, []>("op_241_cast_fp16")];
+            tensor<int32, [4]> var_248_begin_0 = const()[name = tensor<string, []>("op_248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_248_end_0 = const()[name = tensor<string, []>("op_248_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_248_end_mask_0 = const()[name = tensor<string, []>("op_248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = var_190_cast_fp16)[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<int32, [4]> var_255_begin_0 = const()[name = tensor<string, []>("op_255_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_255_end_0 = const()[name = tensor<string, []>("op_255_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_255_end_mask_0 = const()[name = tensor<string, []>("op_255_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = var_194_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
+            tensor<int32, [4]> var_262_begin_0 = const()[name = tensor<string, []>("op_262_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_262_end_0 = const()[name = tensor<string, []>("op_262_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_262_end_mask_0 = const()[name = tensor<string, []>("op_262_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = var_194_cast_fp16)[name = tensor<string, []>("op_262_cast_fp16")];
+            tensor<int32, [4]> var_269_begin_0 = const()[name = tensor<string, []>("op_269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_269_end_0 = const()[name = tensor<string, []>("op_269_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_269_end_mask_0 = const()[name = tensor<string, []>("op_269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_269_cast_fp16 = slice_by_index(begin = var_269_begin_0, end = var_269_end_0, end_mask = var_269_end_mask_0, x = var_194_cast_fp16)[name = tensor<string, []>("op_269_cast_fp16")];
+            tensor<int32, [4]> var_276_begin_0 = const()[name = tensor<string, []>("op_276_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_276_end_0 = const()[name = tensor<string, []>("op_276_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_276_end_mask_0 = const()[name = tensor<string, []>("op_276_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = var_194_cast_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<int32, [4]> var_283_begin_0 = const()[name = tensor<string, []>("op_283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_283_end_0 = const()[name = tensor<string, []>("op_283_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_283_end_mask_0 = const()[name = tensor<string, []>("op_283_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = var_198_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<int32, [4]> var_290_begin_0 = const()[name = tensor<string, []>("op_290_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_290_end_0 = const()[name = tensor<string, []>("op_290_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_290_end_mask_0 = const()[name = tensor<string, []>("op_290_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = var_198_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<int32, [4]> var_297_begin_0 = const()[name = tensor<string, []>("op_297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_297_end_0 = const()[name = tensor<string, []>("op_297_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_297_end_mask_0 = const()[name = tensor<string, []>("op_297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = var_198_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<int32, [4]> var_304_begin_0 = const()[name = tensor<string, []>("op_304_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_304_end_0 = const()[name = tensor<string, []>("op_304_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_304_end_mask_0 = const()[name = tensor<string, []>("op_304_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = var_198_cast_fp16)[name = tensor<string, []>("op_304_cast_fp16")];
+            tensor<int32, [4]> var_311_begin_0 = const()[name = tensor<string, []>("op_311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_311_end_0 = const()[name = tensor<string, []>("op_311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_311_end_mask_0 = const()[name = tensor<string, []>("op_311_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = var_202_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<int32, [4]> var_318_begin_0 = const()[name = tensor<string, []>("op_318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_318_end_0 = const()[name = tensor<string, []>("op_318_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_318_end_mask_0 = const()[name = tensor<string, []>("op_318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = var_202_cast_fp16)[name = tensor<string, []>("op_318_cast_fp16")];
+            tensor<int32, [4]> var_325_begin_0 = const()[name = tensor<string, []>("op_325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_325_end_0 = const()[name = tensor<string, []>("op_325_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_325_end_mask_0 = const()[name = tensor<string, []>("op_325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = var_202_cast_fp16)[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<int32, [4]> var_332_begin_0 = const()[name = tensor<string, []>("op_332_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_332_end_0 = const()[name = tensor<string, []>("op_332_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_332_end_mask_0 = const()[name = tensor<string, []>("op_332_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = var_202_cast_fp16)[name = tensor<string, []>("op_332_cast_fp16")];
+            tensor<int32, [4]> var_339_begin_0 = const()[name = tensor<string, []>("op_339_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_339_end_0 = const()[name = tensor<string, []>("op_339_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_339_end_mask_0 = const()[name = tensor<string, []>("op_339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<int32, [4]> var_346_begin_0 = const()[name = tensor<string, []>("op_346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_346_end_0 = const()[name = tensor<string, []>("op_346_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_346_end_mask_0 = const()[name = tensor<string, []>("op_346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<int32, [4]> var_353_begin_0 = const()[name = tensor<string, []>("op_353_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_353_end_0 = const()[name = tensor<string, []>("op_353_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_353_end_mask_0 = const()[name = tensor<string, []>("op_353_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_353_cast_fp16 = slice_by_index(begin = var_353_begin_0, end = var_353_end_0, end_mask = var_353_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_353_cast_fp16")];
+            tensor<int32, [4]> var_360_begin_0 = const()[name = tensor<string, []>("op_360_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_360_end_0 = const()[name = tensor<string, []>("op_360_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_360_end_mask_0 = const()[name = tensor<string, []>("op_360_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<int32, [4]> var_367_begin_0 = const()[name = tensor<string, []>("op_367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_367_end_0 = const()[name = tensor<string, []>("op_367_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_367_end_mask_0 = const()[name = tensor<string, []>("op_367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_367_cast_fp16")];
+            tensor<int32, [4]> var_374_begin_0 = const()[name = tensor<string, []>("op_374_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_374_end_0 = const()[name = tensor<string, []>("op_374_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_374_end_mask_0 = const()[name = tensor<string, []>("op_374_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<int32, [4]> var_381_begin_0 = const()[name = tensor<string, []>("op_381_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_381_end_0 = const()[name = tensor<string, []>("op_381_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_381_end_mask_0 = const()[name = tensor<string, []>("op_381_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_381_cast_fp16 = slice_by_index(begin = var_381_begin_0, end = var_381_end_0, end_mask = var_381_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_381_cast_fp16")];
+            tensor<int32, [4]> var_388_begin_0 = const()[name = tensor<string, []>("op_388_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_388_end_0 = const()[name = tensor<string, []>("op_388_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_388_end_mask_0 = const()[name = tensor<string, []>("op_388_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_388_cast_fp16")];
+            tensor<int32, [4]> var_395_begin_0 = const()[name = tensor<string, []>("op_395_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_395_end_0 = const()[name = tensor<string, []>("op_395_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_395_end_mask_0 = const()[name = tensor<string, []>("op_395_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_395_cast_fp16")];
+            tensor<int32, [4]> var_402_begin_0 = const()[name = tensor<string, []>("op_402_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_402_end_0 = const()[name = tensor<string, []>("op_402_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_402_end_mask_0 = const()[name = tensor<string, []>("op_402_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_402_cast_fp16 = slice_by_index(begin = var_402_begin_0, end = var_402_end_0, end_mask = var_402_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_402_cast_fp16")];
+            tensor<int32, [4]> var_409_begin_0 = const()[name = tensor<string, []>("op_409_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_409_end_0 = const()[name = tensor<string, []>("op_409_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_409_end_mask_0 = const()[name = tensor<string, []>("op_409_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_409_cast_fp16 = slice_by_index(begin = var_409_begin_0, end = var_409_end_0, end_mask = var_409_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<int32, [4]> var_416_begin_0 = const()[name = tensor<string, []>("op_416_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_416_end_0 = const()[name = tensor<string, []>("op_416_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_416_end_mask_0 = const()[name = tensor<string, []>("op_416_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_416_cast_fp16")];
+            tensor<int32, [4]> var_423_begin_0 = const()[name = tensor<string, []>("op_423_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_423_end_0 = const()[name = tensor<string, []>("op_423_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_423_end_mask_0 = const()[name = tensor<string, []>("op_423_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_423_cast_fp16 = slice_by_index(begin = var_423_begin_0, end = var_423_end_0, end_mask = var_423_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_423_cast_fp16")];
+            tensor<int32, [4]> var_430_begin_0 = const()[name = tensor<string, []>("op_430_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_430_end_0 = const()[name = tensor<string, []>("op_430_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_430_end_mask_0 = const()[name = tensor<string, []>("op_430_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_430_cast_fp16 = slice_by_index(begin = var_430_begin_0, end = var_430_end_0, end_mask = var_430_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_430_cast_fp16")];
+            tensor<int32, [4]> var_437_begin_0 = const()[name = tensor<string, []>("op_437_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_437_end_0 = const()[name = tensor<string, []>("op_437_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_437_end_mask_0 = const()[name = tensor<string, []>("op_437_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_437_cast_fp16 = slice_by_index(begin = var_437_begin_0, end = var_437_end_0, end_mask = var_437_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_437_cast_fp16")];
+            tensor<int32, [4]> var_444_begin_0 = const()[name = tensor<string, []>("op_444_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_444_end_0 = const()[name = tensor<string, []>("op_444_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_444_end_mask_0 = const()[name = tensor<string, []>("op_444_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_444_cast_fp16")];
+            tensor<int32, [4]> k_1_perm_0 = const()[name = tensor<string, []>("k_1_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_449_begin_0 = const()[name = tensor<string, []>("op_449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_449_end_0 = const()[name = tensor<string, []>("op_449_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_449_end_mask_0 = const()[name = tensor<string, []>("op_449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_5 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_449_cast_fp16 = slice_by_index(begin = var_449_begin_0, end = var_449_end_0, end_mask = var_449_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<int32, [4]> var_453_begin_0 = const()[name = tensor<string, []>("op_453_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_453_end_0 = const()[name = tensor<string, []>("op_453_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_453_end_mask_0 = const()[name = tensor<string, []>("op_453_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_453_cast_fp16 = slice_by_index(begin = var_453_begin_0, end = var_453_end_0, end_mask = var_453_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_453_cast_fp16")];
+            tensor<int32, [4]> var_457_begin_0 = const()[name = tensor<string, []>("op_457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_457_end_0 = const()[name = tensor<string, []>("op_457_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_457_end_mask_0 = const()[name = tensor<string, []>("op_457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_457_cast_fp16 = slice_by_index(begin = var_457_begin_0, end = var_457_end_0, end_mask = var_457_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_457_cast_fp16")];
+            tensor<int32, [4]> var_461_begin_0 = const()[name = tensor<string, []>("op_461_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_461_end_0 = const()[name = tensor<string, []>("op_461_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_461_end_mask_0 = const()[name = tensor<string, []>("op_461_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_461_cast_fp16 = slice_by_index(begin = var_461_begin_0, end = var_461_end_0, end_mask = var_461_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_461_cast_fp16")];
+            tensor<int32, [4]> var_465_begin_0 = const()[name = tensor<string, []>("op_465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_465_end_0 = const()[name = tensor<string, []>("op_465_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_465_end_mask_0 = const()[name = tensor<string, []>("op_465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_465_cast_fp16")];
+            tensor<int32, [4]> var_469_begin_0 = const()[name = tensor<string, []>("op_469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_469_end_0 = const()[name = tensor<string, []>("op_469_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_469_end_mask_0 = const()[name = tensor<string, []>("op_469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_469_cast_fp16 = slice_by_index(begin = var_469_begin_0, end = var_469_end_0, end_mask = var_469_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<int32, [4]> var_473_begin_0 = const()[name = tensor<string, []>("op_473_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_473_end_0 = const()[name = tensor<string, []>("op_473_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_473_end_mask_0 = const()[name = tensor<string, []>("op_473_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_473_cast_fp16 = slice_by_index(begin = var_473_begin_0, end = var_473_end_0, end_mask = var_473_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_473_cast_fp16")];
+            tensor<int32, [4]> var_477_begin_0 = const()[name = tensor<string, []>("op_477_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_477_end_0 = const()[name = tensor<string, []>("op_477_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_477_end_mask_0 = const()[name = tensor<string, []>("op_477_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_477_cast_fp16 = slice_by_index(begin = var_477_begin_0, end = var_477_end_0, end_mask = var_477_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_477_cast_fp16")];
+            tensor<int32, [4]> var_479_begin_0 = const()[name = tensor<string, []>("op_479_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_479_end_0 = const()[name = tensor<string, []>("op_479_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_479_end_mask_0 = const()[name = tensor<string, []>("op_479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = var_479_end_0, end_mask = var_479_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_479_cast_fp16")];
+            tensor<int32, [4]> var_483_begin_0 = const()[name = tensor<string, []>("op_483_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_483_end_0 = const()[name = tensor<string, []>("op_483_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_483_end_mask_0 = const()[name = tensor<string, []>("op_483_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_483_cast_fp16 = slice_by_index(begin = var_483_begin_0, end = var_483_end_0, end_mask = var_483_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<int32, [4]> var_487_begin_0 = const()[name = tensor<string, []>("op_487_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_487_end_0 = const()[name = tensor<string, []>("op_487_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_487_end_mask_0 = const()[name = tensor<string, []>("op_487_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_487_cast_fp16 = slice_by_index(begin = var_487_begin_0, end = var_487_end_0, end_mask = var_487_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_487_cast_fp16")];
+            tensor<int32, [4]> var_491_begin_0 = const()[name = tensor<string, []>("op_491_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_491_end_0 = const()[name = tensor<string, []>("op_491_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_491_end_mask_0 = const()[name = tensor<string, []>("op_491_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_491_cast_fp16 = slice_by_index(begin = var_491_begin_0, end = var_491_end_0, end_mask = var_491_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_491_cast_fp16")];
+            tensor<int32, [4]> var_495_begin_0 = const()[name = tensor<string, []>("op_495_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_495_end_0 = const()[name = tensor<string, []>("op_495_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_495_end_mask_0 = const()[name = tensor<string, []>("op_495_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_495_cast_fp16 = slice_by_index(begin = var_495_begin_0, end = var_495_end_0, end_mask = var_495_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_495_cast_fp16")];
+            tensor<int32, [4]> var_499_begin_0 = const()[name = tensor<string, []>("op_499_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_499_end_0 = const()[name = tensor<string, []>("op_499_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_499_end_mask_0 = const()[name = tensor<string, []>("op_499_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_499_cast_fp16 = slice_by_index(begin = var_499_begin_0, end = var_499_end_0, end_mask = var_499_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_499_cast_fp16")];
+            tensor<int32, [4]> var_503_begin_0 = const()[name = tensor<string, []>("op_503_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_503_end_0 = const()[name = tensor<string, []>("op_503_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_503_end_mask_0 = const()[name = tensor<string, []>("op_503_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_503_cast_fp16 = slice_by_index(begin = var_503_begin_0, end = var_503_end_0, end_mask = var_503_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_503_cast_fp16")];
+            tensor<int32, [4]> var_507_begin_0 = const()[name = tensor<string, []>("op_507_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_507_end_0 = const()[name = tensor<string, []>("op_507_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_507_end_mask_0 = const()[name = tensor<string, []>("op_507_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_507_cast_fp16 = slice_by_index(begin = var_507_begin_0, end = var_507_end_0, end_mask = var_507_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_507_cast_fp16")];
+            tensor<string, []> var_511_equation_0 = const()[name = tensor<string, []>("op_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_449_cast_fp16, var_227_cast_fp16))[name = tensor<string, []>("op_511_cast_fp16")];
+            tensor<fp16, []> var_512_to_fp16 = const()[name = tensor<string, []>("op_512_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor<string, []>("aw_chunk_1_cast_fp16")];
+            tensor<string, []> var_515_equation_0 = const()[name = tensor<string, []>("op_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_449_cast_fp16, var_234_cast_fp16))[name = tensor<string, []>("op_515_cast_fp16")];
+            tensor<fp16, []> var_516_to_fp16 = const()[name = tensor<string, []>("op_516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_3_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor<string, []>("aw_chunk_3_cast_fp16")];
+            tensor<string, []> var_519_equation_0 = const()[name = tensor<string, []>("op_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_449_cast_fp16, var_241_cast_fp16))[name = tensor<string, []>("op_519_cast_fp16")];
+            tensor<fp16, []> var_520_to_fp16 = const()[name = tensor<string, []>("op_520_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_5_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor<string, []>("aw_chunk_5_cast_fp16")];
+            tensor<string, []> var_523_equation_0 = const()[name = tensor<string, []>("op_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_449_cast_fp16, var_248_cast_fp16))[name = tensor<string, []>("op_523_cast_fp16")];
+            tensor<fp16, []> var_524_to_fp16 = const()[name = tensor<string, []>("op_524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_7_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor<string, []>("aw_chunk_7_cast_fp16")];
+            tensor<string, []> var_527_equation_0 = const()[name = tensor<string, []>("op_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_453_cast_fp16, var_255_cast_fp16))[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<fp16, []> var_528_to_fp16 = const()[name = tensor<string, []>("op_528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_9_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor<string, []>("aw_chunk_9_cast_fp16")];
+            tensor<string, []> var_531_equation_0 = const()[name = tensor<string, []>("op_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_453_cast_fp16, var_262_cast_fp16))[name = tensor<string, []>("op_531_cast_fp16")];
+            tensor<fp16, []> var_532_to_fp16 = const()[name = tensor<string, []>("op_532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_11_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor<string, []>("aw_chunk_11_cast_fp16")];
+            tensor<string, []> var_535_equation_0 = const()[name = tensor<string, []>("op_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_453_cast_fp16, var_269_cast_fp16))[name = tensor<string, []>("op_535_cast_fp16")];
+            tensor<fp16, []> var_536_to_fp16 = const()[name = tensor<string, []>("op_536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_13_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor<string, []>("aw_chunk_13_cast_fp16")];
+            tensor<string, []> var_539_equation_0 = const()[name = tensor<string, []>("op_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_453_cast_fp16, var_276_cast_fp16))[name = tensor<string, []>("op_539_cast_fp16")];
+            tensor<fp16, []> var_540_to_fp16 = const()[name = tensor<string, []>("op_540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_15_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor<string, []>("aw_chunk_15_cast_fp16")];
+            tensor<string, []> var_543_equation_0 = const()[name = tensor<string, []>("op_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_457_cast_fp16, var_283_cast_fp16))[name = tensor<string, []>("op_543_cast_fp16")];
+            tensor<fp16, []> var_544_to_fp16 = const()[name = tensor<string, []>("op_544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_17_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor<string, []>("aw_chunk_17_cast_fp16")];
+            tensor<string, []> var_547_equation_0 = const()[name = tensor<string, []>("op_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_457_cast_fp16, var_290_cast_fp16))[name = tensor<string, []>("op_547_cast_fp16")];
+            tensor<fp16, []> var_548_to_fp16 = const()[name = tensor<string, []>("op_548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_19_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor<string, []>("aw_chunk_19_cast_fp16")];
+            tensor<string, []> var_551_equation_0 = const()[name = tensor<string, []>("op_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_457_cast_fp16, var_297_cast_fp16))[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, []> var_552_to_fp16 = const()[name = tensor<string, []>("op_552_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_21_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor<string, []>("aw_chunk_21_cast_fp16")];
+            tensor<string, []> var_555_equation_0 = const()[name = tensor<string, []>("op_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_457_cast_fp16, var_304_cast_fp16))[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<fp16, []> var_556_to_fp16 = const()[name = tensor<string, []>("op_556_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_23_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor<string, []>("aw_chunk_23_cast_fp16")];
+            tensor<string, []> var_559_equation_0 = const()[name = tensor<string, []>("op_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_461_cast_fp16, var_311_cast_fp16))[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<fp16, []> var_560_to_fp16 = const()[name = tensor<string, []>("op_560_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_25_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor<string, []>("aw_chunk_25_cast_fp16")];
+            tensor<string, []> var_563_equation_0 = const()[name = tensor<string, []>("op_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_461_cast_fp16, var_318_cast_fp16))[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<fp16, []> var_564_to_fp16 = const()[name = tensor<string, []>("op_564_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_27_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor<string, []>("aw_chunk_27_cast_fp16")];
+            tensor<string, []> var_567_equation_0 = const()[name = tensor<string, []>("op_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_461_cast_fp16, var_325_cast_fp16))[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<fp16, []> var_568_to_fp16 = const()[name = tensor<string, []>("op_568_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_29_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor<string, []>("aw_chunk_29_cast_fp16")];
+            tensor<string, []> var_571_equation_0 = const()[name = tensor<string, []>("op_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_461_cast_fp16, var_332_cast_fp16))[name = tensor<string, []>("op_571_cast_fp16")];
+            tensor<fp16, []> var_572_to_fp16 = const()[name = tensor<string, []>("op_572_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_31_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor<string, []>("aw_chunk_31_cast_fp16")];
+            tensor<string, []> var_575_equation_0 = const()[name = tensor<string, []>("op_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_575_cast_fp16 = einsum(equation = var_575_equation_0, values = (var_465_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("op_575_cast_fp16")];
+            tensor<fp16, []> var_576_to_fp16 = const()[name = tensor<string, []>("op_576_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_33_cast_fp16 = mul(x = var_575_cast_fp16, y = var_576_to_fp16)[name = tensor<string, []>("aw_chunk_33_cast_fp16")];
+            tensor<string, []> var_579_equation_0 = const()[name = tensor<string, []>("op_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_579_cast_fp16 = einsum(equation = var_579_equation_0, values = (var_465_cast_fp16, var_346_cast_fp16))[name = tensor<string, []>("op_579_cast_fp16")];
+            tensor<fp16, []> var_580_to_fp16 = const()[name = tensor<string, []>("op_580_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_35_cast_fp16 = mul(x = var_579_cast_fp16, y = var_580_to_fp16)[name = tensor<string, []>("aw_chunk_35_cast_fp16")];
+            tensor<string, []> var_583_equation_0 = const()[name = tensor<string, []>("op_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_583_cast_fp16 = einsum(equation = var_583_equation_0, values = (var_465_cast_fp16, var_353_cast_fp16))[name = tensor<string, []>("op_583_cast_fp16")];
+            tensor<fp16, []> var_584_to_fp16 = const()[name = tensor<string, []>("op_584_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_37_cast_fp16 = mul(x = var_583_cast_fp16, y = var_584_to_fp16)[name = tensor<string, []>("aw_chunk_37_cast_fp16")];
+            tensor<string, []> var_587_equation_0 = const()[name = tensor<string, []>("op_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_587_cast_fp16 = einsum(equation = var_587_equation_0, values = (var_465_cast_fp16, var_360_cast_fp16))[name = tensor<string, []>("op_587_cast_fp16")];
+            tensor<fp16, []> var_588_to_fp16 = const()[name = tensor<string, []>("op_588_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_39_cast_fp16 = mul(x = var_587_cast_fp16, y = var_588_to_fp16)[name = tensor<string, []>("aw_chunk_39_cast_fp16")];
+            tensor<string, []> var_591_equation_0 = const()[name = tensor<string, []>("op_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_591_cast_fp16 = einsum(equation = var_591_equation_0, values = (var_469_cast_fp16, var_367_cast_fp16))[name = tensor<string, []>("op_591_cast_fp16")];
+            tensor<fp16, []> var_592_to_fp16 = const()[name = tensor<string, []>("op_592_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_41_cast_fp16 = mul(x = var_591_cast_fp16, y = var_592_to_fp16)[name = tensor<string, []>("aw_chunk_41_cast_fp16")];
+            tensor<string, []> var_595_equation_0 = const()[name = tensor<string, []>("op_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_595_cast_fp16 = einsum(equation = var_595_equation_0, values = (var_469_cast_fp16, var_374_cast_fp16))[name = tensor<string, []>("op_595_cast_fp16")];
+            tensor<fp16, []> var_596_to_fp16 = const()[name = tensor<string, []>("op_596_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_43_cast_fp16 = mul(x = var_595_cast_fp16, y = var_596_to_fp16)[name = tensor<string, []>("aw_chunk_43_cast_fp16")];
+            tensor<string, []> var_599_equation_0 = const()[name = tensor<string, []>("op_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_599_cast_fp16 = einsum(equation = var_599_equation_0, values = (var_469_cast_fp16, var_381_cast_fp16))[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<fp16, []> var_600_to_fp16 = const()[name = tensor<string, []>("op_600_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_45_cast_fp16 = mul(x = var_599_cast_fp16, y = var_600_to_fp16)[name = tensor<string, []>("aw_chunk_45_cast_fp16")];
+            tensor<string, []> var_603_equation_0 = const()[name = tensor<string, []>("op_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_603_cast_fp16 = einsum(equation = var_603_equation_0, values = (var_469_cast_fp16, var_388_cast_fp16))[name = tensor<string, []>("op_603_cast_fp16")];
+            tensor<fp16, []> var_604_to_fp16 = const()[name = tensor<string, []>("op_604_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_47_cast_fp16 = mul(x = var_603_cast_fp16, y = var_604_to_fp16)[name = tensor<string, []>("aw_chunk_47_cast_fp16")];
+            tensor<string, []> var_607_equation_0 = const()[name = tensor<string, []>("op_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_607_cast_fp16 = einsum(equation = var_607_equation_0, values = (var_473_cast_fp16, var_395_cast_fp16))[name = tensor<string, []>("op_607_cast_fp16")];
+            tensor<fp16, []> var_608_to_fp16 = const()[name = tensor<string, []>("op_608_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_49_cast_fp16 = mul(x = var_607_cast_fp16, y = var_608_to_fp16)[name = tensor<string, []>("aw_chunk_49_cast_fp16")];
+            tensor<string, []> var_611_equation_0 = const()[name = tensor<string, []>("op_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_611_cast_fp16 = einsum(equation = var_611_equation_0, values = (var_473_cast_fp16, var_402_cast_fp16))[name = tensor<string, []>("op_611_cast_fp16")];
+            tensor<fp16, []> var_612_to_fp16 = const()[name = tensor<string, []>("op_612_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_51_cast_fp16 = mul(x = var_611_cast_fp16, y = var_612_to_fp16)[name = tensor<string, []>("aw_chunk_51_cast_fp16")];
+            tensor<string, []> var_615_equation_0 = const()[name = tensor<string, []>("op_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_615_cast_fp16 = einsum(equation = var_615_equation_0, values = (var_473_cast_fp16, var_409_cast_fp16))[name = tensor<string, []>("op_615_cast_fp16")];
+            tensor<fp16, []> var_616_to_fp16 = const()[name = tensor<string, []>("op_616_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_53_cast_fp16 = mul(x = var_615_cast_fp16, y = var_616_to_fp16)[name = tensor<string, []>("aw_chunk_53_cast_fp16")];
+            tensor<string, []> var_619_equation_0 = const()[name = tensor<string, []>("op_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_619_cast_fp16 = einsum(equation = var_619_equation_0, values = (var_473_cast_fp16, var_416_cast_fp16))[name = tensor<string, []>("op_619_cast_fp16")];
+            tensor<fp16, []> var_620_to_fp16 = const()[name = tensor<string, []>("op_620_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_55_cast_fp16 = mul(x = var_619_cast_fp16, y = var_620_to_fp16)[name = tensor<string, []>("aw_chunk_55_cast_fp16")];
+            tensor<string, []> var_623_equation_0 = const()[name = tensor<string, []>("op_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_477_cast_fp16, var_423_cast_fp16))[name = tensor<string, []>("op_623_cast_fp16")];
+            tensor<fp16, []> var_624_to_fp16 = const()[name = tensor<string, []>("op_624_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_57_cast_fp16 = mul(x = var_623_cast_fp16, y = var_624_to_fp16)[name = tensor<string, []>("aw_chunk_57_cast_fp16")];
+            tensor<string, []> var_627_equation_0 = const()[name = tensor<string, []>("op_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_477_cast_fp16, var_430_cast_fp16))[name = tensor<string, []>("op_627_cast_fp16")];
+            tensor<fp16, []> var_628_to_fp16 = const()[name = tensor<string, []>("op_628_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_59_cast_fp16 = mul(x = var_627_cast_fp16, y = var_628_to_fp16)[name = tensor<string, []>("aw_chunk_59_cast_fp16")];
+            tensor<string, []> var_631_equation_0 = const()[name = tensor<string, []>("op_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_477_cast_fp16, var_437_cast_fp16))[name = tensor<string, []>("op_631_cast_fp16")];
+            tensor<fp16, []> var_632_to_fp16 = const()[name = tensor<string, []>("op_632_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_61_cast_fp16 = mul(x = var_631_cast_fp16, y = var_632_to_fp16)[name = tensor<string, []>("aw_chunk_61_cast_fp16")];
+            tensor<string, []> var_635_equation_0 = const()[name = tensor<string, []>("op_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_477_cast_fp16, var_444_cast_fp16))[name = tensor<string, []>("op_635_cast_fp16")];
+            tensor<fp16, []> var_636_to_fp16 = const()[name = tensor<string, []>("op_636_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_63_cast_fp16 = mul(x = var_635_cast_fp16, y = var_636_to_fp16)[name = tensor<string, []>("aw_chunk_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_638_cast_fp16 = softmax(axis = var_135, x = aw_chunk_1_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_639_cast_fp16 = softmax(axis = var_135, x = aw_chunk_3_cast_fp16)[name = tensor<string, []>("op_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_640_cast_fp16 = softmax(axis = var_135, x = aw_chunk_5_cast_fp16)[name = tensor<string, []>("op_640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_641_cast_fp16 = softmax(axis = var_135, x = aw_chunk_7_cast_fp16)[name = tensor<string, []>("op_641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_642_cast_fp16 = softmax(axis = var_135, x = aw_chunk_9_cast_fp16)[name = tensor<string, []>("op_642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_643_cast_fp16 = softmax(axis = var_135, x = aw_chunk_11_cast_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_644_cast_fp16 = softmax(axis = var_135, x = aw_chunk_13_cast_fp16)[name = tensor<string, []>("op_644_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_645_cast_fp16 = softmax(axis = var_135, x = aw_chunk_15_cast_fp16)[name = tensor<string, []>("op_645_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_646_cast_fp16 = softmax(axis = var_135, x = aw_chunk_17_cast_fp16)[name = tensor<string, []>("op_646_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_647_cast_fp16 = softmax(axis = var_135, x = aw_chunk_19_cast_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_648_cast_fp16 = softmax(axis = var_135, x = aw_chunk_21_cast_fp16)[name = tensor<string, []>("op_648_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_649_cast_fp16 = softmax(axis = var_135, x = aw_chunk_23_cast_fp16)[name = tensor<string, []>("op_649_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_650_cast_fp16 = softmax(axis = var_135, x = aw_chunk_25_cast_fp16)[name = tensor<string, []>("op_650_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_651_cast_fp16 = softmax(axis = var_135, x = aw_chunk_27_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_652_cast_fp16 = softmax(axis = var_135, x = aw_chunk_29_cast_fp16)[name = tensor<string, []>("op_652_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_653_cast_fp16 = softmax(axis = var_135, x = aw_chunk_31_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_654_cast_fp16 = softmax(axis = var_135, x = aw_chunk_33_cast_fp16)[name = tensor<string, []>("op_654_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_655_cast_fp16 = softmax(axis = var_135, x = aw_chunk_35_cast_fp16)[name = tensor<string, []>("op_655_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_656_cast_fp16 = softmax(axis = var_135, x = aw_chunk_37_cast_fp16)[name = tensor<string, []>("op_656_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_657_cast_fp16 = softmax(axis = var_135, x = aw_chunk_39_cast_fp16)[name = tensor<string, []>("op_657_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_658_cast_fp16 = softmax(axis = var_135, x = aw_chunk_41_cast_fp16)[name = tensor<string, []>("op_658_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_659_cast_fp16 = softmax(axis = var_135, x = aw_chunk_43_cast_fp16)[name = tensor<string, []>("op_659_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_660_cast_fp16 = softmax(axis = var_135, x = aw_chunk_45_cast_fp16)[name = tensor<string, []>("op_660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_661_cast_fp16 = softmax(axis = var_135, x = aw_chunk_47_cast_fp16)[name = tensor<string, []>("op_661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_662_cast_fp16 = softmax(axis = var_135, x = aw_chunk_49_cast_fp16)[name = tensor<string, []>("op_662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_663_cast_fp16 = softmax(axis = var_135, x = aw_chunk_51_cast_fp16)[name = tensor<string, []>("op_663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_664_cast_fp16 = softmax(axis = var_135, x = aw_chunk_53_cast_fp16)[name = tensor<string, []>("op_664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_665_cast_fp16 = softmax(axis = var_135, x = aw_chunk_55_cast_fp16)[name = tensor<string, []>("op_665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_666_cast_fp16 = softmax(axis = var_135, x = aw_chunk_57_cast_fp16)[name = tensor<string, []>("op_666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_667_cast_fp16 = softmax(axis = var_135, x = aw_chunk_59_cast_fp16)[name = tensor<string, []>("op_667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_668_cast_fp16 = softmax(axis = var_135, x = aw_chunk_61_cast_fp16)[name = tensor<string, []>("op_668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_669_cast_fp16 = softmax(axis = var_135, x = aw_chunk_63_cast_fp16)[name = tensor<string, []>("op_669_cast_fp16")];
+            tensor<string, []> var_671_equation_0 = const()[name = tensor<string, []>("op_671_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_671_cast_fp16 = einsum(equation = var_671_equation_0, values = (var_479_cast_fp16, var_638_cast_fp16))[name = tensor<string, []>("op_671_cast_fp16")];
+            tensor<string, []> var_673_equation_0 = const()[name = tensor<string, []>("op_673_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_673_cast_fp16 = einsum(equation = var_673_equation_0, values = (var_479_cast_fp16, var_639_cast_fp16))[name = tensor<string, []>("op_673_cast_fp16")];
+            tensor<string, []> var_675_equation_0 = const()[name = tensor<string, []>("op_675_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_675_cast_fp16 = einsum(equation = var_675_equation_0, values = (var_479_cast_fp16, var_640_cast_fp16))[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<string, []> var_677_equation_0 = const()[name = tensor<string, []>("op_677_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_677_cast_fp16 = einsum(equation = var_677_equation_0, values = (var_479_cast_fp16, var_641_cast_fp16))[name = tensor<string, []>("op_677_cast_fp16")];
+            tensor<string, []> var_679_equation_0 = const()[name = tensor<string, []>("op_679_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_679_cast_fp16 = einsum(equation = var_679_equation_0, values = (var_483_cast_fp16, var_642_cast_fp16))[name = tensor<string, []>("op_679_cast_fp16")];
+            tensor<string, []> var_681_equation_0 = const()[name = tensor<string, []>("op_681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_681_cast_fp16 = einsum(equation = var_681_equation_0, values = (var_483_cast_fp16, var_643_cast_fp16))[name = tensor<string, []>("op_681_cast_fp16")];
+            tensor<string, []> var_683_equation_0 = const()[name = tensor<string, []>("op_683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_683_cast_fp16 = einsum(equation = var_683_equation_0, values = (var_483_cast_fp16, var_644_cast_fp16))[name = tensor<string, []>("op_683_cast_fp16")];
+            tensor<string, []> var_685_equation_0 = const()[name = tensor<string, []>("op_685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_685_cast_fp16 = einsum(equation = var_685_equation_0, values = (var_483_cast_fp16, var_645_cast_fp16))[name = tensor<string, []>("op_685_cast_fp16")];
+            tensor<string, []> var_687_equation_0 = const()[name = tensor<string, []>("op_687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_687_cast_fp16 = einsum(equation = var_687_equation_0, values = (var_487_cast_fp16, var_646_cast_fp16))[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<string, []> var_689_equation_0 = const()[name = tensor<string, []>("op_689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_689_cast_fp16 = einsum(equation = var_689_equation_0, values = (var_487_cast_fp16, var_647_cast_fp16))[name = tensor<string, []>("op_689_cast_fp16")];
+            tensor<string, []> var_691_equation_0 = const()[name = tensor<string, []>("op_691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_691_cast_fp16 = einsum(equation = var_691_equation_0, values = (var_487_cast_fp16, var_648_cast_fp16))[name = tensor<string, []>("op_691_cast_fp16")];
+            tensor<string, []> var_693_equation_0 = const()[name = tensor<string, []>("op_693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_693_cast_fp16 = einsum(equation = var_693_equation_0, values = (var_487_cast_fp16, var_649_cast_fp16))[name = tensor<string, []>("op_693_cast_fp16")];
+            tensor<string, []> var_695_equation_0 = const()[name = tensor<string, []>("op_695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_695_cast_fp16 = einsum(equation = var_695_equation_0, values = (var_491_cast_fp16, var_650_cast_fp16))[name = tensor<string, []>("op_695_cast_fp16")];
+            tensor<string, []> var_697_equation_0 = const()[name = tensor<string, []>("op_697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_697_cast_fp16 = einsum(equation = var_697_equation_0, values = (var_491_cast_fp16, var_651_cast_fp16))[name = tensor<string, []>("op_697_cast_fp16")];
+            tensor<string, []> var_699_equation_0 = const()[name = tensor<string, []>("op_699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_699_cast_fp16 = einsum(equation = var_699_equation_0, values = (var_491_cast_fp16, var_652_cast_fp16))[name = tensor<string, []>("op_699_cast_fp16")];
+            tensor<string, []> var_701_equation_0 = const()[name = tensor<string, []>("op_701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_701_cast_fp16 = einsum(equation = var_701_equation_0, values = (var_491_cast_fp16, var_653_cast_fp16))[name = tensor<string, []>("op_701_cast_fp16")];
+            tensor<string, []> var_703_equation_0 = const()[name = tensor<string, []>("op_703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_703_cast_fp16 = einsum(equation = var_703_equation_0, values = (var_495_cast_fp16, var_654_cast_fp16))[name = tensor<string, []>("op_703_cast_fp16")];
+            tensor<string, []> var_705_equation_0 = const()[name = tensor<string, []>("op_705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_705_cast_fp16 = einsum(equation = var_705_equation_0, values = (var_495_cast_fp16, var_655_cast_fp16))[name = tensor<string, []>("op_705_cast_fp16")];
+            tensor<string, []> var_707_equation_0 = const()[name = tensor<string, []>("op_707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_707_cast_fp16 = einsum(equation = var_707_equation_0, values = (var_495_cast_fp16, var_656_cast_fp16))[name = tensor<string, []>("op_707_cast_fp16")];
+            tensor<string, []> var_709_equation_0 = const()[name = tensor<string, []>("op_709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_709_cast_fp16 = einsum(equation = var_709_equation_0, values = (var_495_cast_fp16, var_657_cast_fp16))[name = tensor<string, []>("op_709_cast_fp16")];
+            tensor<string, []> var_711_equation_0 = const()[name = tensor<string, []>("op_711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_711_cast_fp16 = einsum(equation = var_711_equation_0, values = (var_499_cast_fp16, var_658_cast_fp16))[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<string, []> var_713_equation_0 = const()[name = tensor<string, []>("op_713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_713_cast_fp16 = einsum(equation = var_713_equation_0, values = (var_499_cast_fp16, var_659_cast_fp16))[name = tensor<string, []>("op_713_cast_fp16")];
+            tensor<string, []> var_715_equation_0 = const()[name = tensor<string, []>("op_715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_715_cast_fp16 = einsum(equation = var_715_equation_0, values = (var_499_cast_fp16, var_660_cast_fp16))[name = tensor<string, []>("op_715_cast_fp16")];
+            tensor<string, []> var_717_equation_0 = const()[name = tensor<string, []>("op_717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_717_cast_fp16 = einsum(equation = var_717_equation_0, values = (var_499_cast_fp16, var_661_cast_fp16))[name = tensor<string, []>("op_717_cast_fp16")];
+            tensor<string, []> var_719_equation_0 = const()[name = tensor<string, []>("op_719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_719_cast_fp16 = einsum(equation = var_719_equation_0, values = (var_503_cast_fp16, var_662_cast_fp16))[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<string, []> var_721_equation_0 = const()[name = tensor<string, []>("op_721_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_721_cast_fp16 = einsum(equation = var_721_equation_0, values = (var_503_cast_fp16, var_663_cast_fp16))[name = tensor<string, []>("op_721_cast_fp16")];
+            tensor<string, []> var_723_equation_0 = const()[name = tensor<string, []>("op_723_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_723_cast_fp16 = einsum(equation = var_723_equation_0, values = (var_503_cast_fp16, var_664_cast_fp16))[name = tensor<string, []>("op_723_cast_fp16")];
+            tensor<string, []> var_725_equation_0 = const()[name = tensor<string, []>("op_725_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_725_cast_fp16 = einsum(equation = var_725_equation_0, values = (var_503_cast_fp16, var_665_cast_fp16))[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<string, []> var_727_equation_0 = const()[name = tensor<string, []>("op_727_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_727_cast_fp16 = einsum(equation = var_727_equation_0, values = (var_507_cast_fp16, var_666_cast_fp16))[name = tensor<string, []>("op_727_cast_fp16")];
+            tensor<string, []> var_729_equation_0 = const()[name = tensor<string, []>("op_729_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_729_cast_fp16 = einsum(equation = var_729_equation_0, values = (var_507_cast_fp16, var_667_cast_fp16))[name = tensor<string, []>("op_729_cast_fp16")];
+            tensor<string, []> var_731_equation_0 = const()[name = tensor<string, []>("op_731_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_731_cast_fp16 = einsum(equation = var_731_equation_0, values = (var_507_cast_fp16, var_668_cast_fp16))[name = tensor<string, []>("op_731_cast_fp16")];
+            tensor<string, []> var_733_equation_0 = const()[name = tensor<string, []>("op_733_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_733_cast_fp16 = einsum(equation = var_733_equation_0, values = (var_507_cast_fp16, var_669_cast_fp16))[name = tensor<string, []>("op_733_cast_fp16")];
+            tensor<bool, []> var_735_interleave_0 = const()[name = tensor<string, []>("op_735_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_735_cast_fp16 = concat(axis = var_122, interleave = var_735_interleave_0, values = (var_671_cast_fp16, var_673_cast_fp16, var_675_cast_fp16, var_677_cast_fp16))[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<bool, []> var_737_interleave_0 = const()[name = tensor<string, []>("op_737_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_737_cast_fp16 = concat(axis = var_122, interleave = var_737_interleave_0, values = (var_679_cast_fp16, var_681_cast_fp16, var_683_cast_fp16, var_685_cast_fp16))[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<bool, []> var_739_interleave_0 = const()[name = tensor<string, []>("op_739_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_739_cast_fp16 = concat(axis = var_122, interleave = var_739_interleave_0, values = (var_687_cast_fp16, var_689_cast_fp16, var_691_cast_fp16, var_693_cast_fp16))[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<bool, []> var_741_interleave_0 = const()[name = tensor<string, []>("op_741_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_741_cast_fp16 = concat(axis = var_122, interleave = var_741_interleave_0, values = (var_695_cast_fp16, var_697_cast_fp16, var_699_cast_fp16, var_701_cast_fp16))[name = tensor<string, []>("op_741_cast_fp16")];
+            tensor<bool, []> var_743_interleave_0 = const()[name = tensor<string, []>("op_743_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_743_cast_fp16 = concat(axis = var_122, interleave = var_743_interleave_0, values = (var_703_cast_fp16, var_705_cast_fp16, var_707_cast_fp16, var_709_cast_fp16))[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<bool, []> var_745_interleave_0 = const()[name = tensor<string, []>("op_745_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_745_cast_fp16 = concat(axis = var_122, interleave = var_745_interleave_0, values = (var_711_cast_fp16, var_713_cast_fp16, var_715_cast_fp16, var_717_cast_fp16))[name = tensor<string, []>("op_745_cast_fp16")];
+            tensor<bool, []> var_747_interleave_0 = const()[name = tensor<string, []>("op_747_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_747_cast_fp16 = concat(axis = var_122, interleave = var_747_interleave_0, values = (var_719_cast_fp16, var_721_cast_fp16, var_723_cast_fp16, var_725_cast_fp16))[name = tensor<string, []>("op_747_cast_fp16")];
+            tensor<bool, []> var_749_interleave_0 = const()[name = tensor<string, []>("op_749_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_749_cast_fp16 = concat(axis = var_122, interleave = var_749_interleave_0, values = (var_727_cast_fp16, var_729_cast_fp16, var_731_cast_fp16, var_733_cast_fp16))[name = tensor<string, []>("op_749_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_1_cast_fp16 = concat(axis = var_135, interleave = input_1_interleave_0, values = (var_735_cast_fp16, var_737_cast_fp16, var_739_cast_fp16, var_741_cast_fp16, var_743_cast_fp16, var_745_cast_fp16, var_747_cast_fp16, var_749_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_754 = const()[name = tensor<string, []>("op_754"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_756 = const()[name = tensor<string, []>("op_756"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4936640)))];
+            tensor<fp16, [512]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5460992)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_756, groups = var_135, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = var_754, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_762, keep_dims = var_136, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_766 = const()[name = tensor<string, []>("op_766"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_767_cast_fp16 = reduce_mean(axes = var_766, keep_dims = var_136, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_767_cast_fp16")];
+            tensor<fp16, []> var_768_to_fp16 = const()[name = tensor<string, []>("op_768_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_769_cast_fp16 = add(x = var_767_cast_fp16, y = var_768_to_fp16)[name = tensor<string, []>("op_769_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_769_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [512]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5462080)))];
+            tensor<fp16, [512]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5463168)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_782 = const()[name = tensor<string, []>("op_782"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5464256)))];
+            tensor<fp16, [2048]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7561472)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_782, groups = var_135, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = var_780, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<int32, [2]> var_788 = const()[name = tensor<string, []>("op_788"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_790 = const()[name = tensor<string, []>("op_790"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7565632)))];
+            tensor<fp16, [512]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9662848)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_790, groups = var_135, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_788, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_797 = const()[name = tensor<string, []>("op_797"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_810 = const()[name = tensor<string, []>("op_810"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_821 = const()[name = tensor<string, []>("op_821"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_821, keep_dims = var_811, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_825 = const()[name = tensor<string, []>("op_825"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_826_cast_fp16 = reduce_mean(axes = var_825, keep_dims = var_811, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<fp16, []> var_827_to_fp16 = const()[name = tensor<string, []>("op_827_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_828_cast_fp16 = add(x = var_826_cast_fp16, y = var_827_to_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_828_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [512]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9663936)))];
+            tensor<fp16, [512]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9665024)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<int32, [2]> var_843 = const()[name = tensor<string, []>("op_843"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_845 = const()[name = tensor<string, []>("op_845"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9666112)))];
+            tensor<fp16, [512]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10190464)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_845, groups = var_810, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_843, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_849 = const()[name = tensor<string, []>("op_849"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_851 = const()[name = tensor<string, []>("op_851"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10191552)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_851, groups = var_810, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_849, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_856 = const()[name = tensor<string, []>("op_856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_858 = const()[name = tensor<string, []>("op_858"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10715904)))];
+            tensor<fp16, [512]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11240256)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_858, groups = var_810, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_856, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_865_begin_0 = const()[name = tensor<string, []>("op_865_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_865_end_0 = const()[name = tensor<string, []>("op_865_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_865_end_mask_0 = const()[name = tensor<string, []>("op_865_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<int32, [4]> var_869_begin_0 = const()[name = tensor<string, []>("op_869_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_869_end_0 = const()[name = tensor<string, []>("op_869_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_869_end_mask_0 = const()[name = tensor<string, []>("op_869_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = var_869_end_0, end_mask = var_869_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<int32, [4]> var_873_begin_0 = const()[name = tensor<string, []>("op_873_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_873_end_0 = const()[name = tensor<string, []>("op_873_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_873_end_mask_0 = const()[name = tensor<string, []>("op_873_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = slice_by_index(begin = var_873_begin_0, end = var_873_end_0, end_mask = var_873_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<int32, [4]> var_877_begin_0 = const()[name = tensor<string, []>("op_877_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_877_end_0 = const()[name = tensor<string, []>("op_877_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_877_end_mask_0 = const()[name = tensor<string, []>("op_877_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = slice_by_index(begin = var_877_begin_0, end = var_877_end_0, end_mask = var_877_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<int32, [4]> var_881_begin_0 = const()[name = tensor<string, []>("op_881_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_881_end_0 = const()[name = tensor<string, []>("op_881_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_881_end_mask_0 = const()[name = tensor<string, []>("op_881_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_881_cast_fp16 = slice_by_index(begin = var_881_begin_0, end = var_881_end_0, end_mask = var_881_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<int32, [4]> var_885_begin_0 = const()[name = tensor<string, []>("op_885_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_885_end_0 = const()[name = tensor<string, []>("op_885_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_885_end_mask_0 = const()[name = tensor<string, []>("op_885_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_885_cast_fp16 = slice_by_index(begin = var_885_begin_0, end = var_885_end_0, end_mask = var_885_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_885_cast_fp16")];
+            tensor<int32, [4]> var_889_begin_0 = const()[name = tensor<string, []>("op_889_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_889_end_0 = const()[name = tensor<string, []>("op_889_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_889_end_mask_0 = const()[name = tensor<string, []>("op_889_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_889_cast_fp16 = slice_by_index(begin = var_889_begin_0, end = var_889_end_0, end_mask = var_889_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [4]> var_893_begin_0 = const()[name = tensor<string, []>("op_893_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_893_end_0 = const()[name = tensor<string, []>("op_893_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_893_end_mask_0 = const()[name = tensor<string, []>("op_893_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_893_cast_fp16 = slice_by_index(begin = var_893_begin_0, end = var_893_end_0, end_mask = var_893_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<int32, [4]> var_902_begin_0 = const()[name = tensor<string, []>("op_902_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_902_end_0 = const()[name = tensor<string, []>("op_902_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_902_end_mask_0 = const()[name = tensor<string, []>("op_902_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_902_cast_fp16 = slice_by_index(begin = var_902_begin_0, end = var_902_end_0, end_mask = var_902_end_mask_0, x = var_865_cast_fp16)[name = tensor<string, []>("op_902_cast_fp16")];
+            tensor<int32, [4]> var_909_begin_0 = const()[name = tensor<string, []>("op_909_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_909_end_0 = const()[name = tensor<string, []>("op_909_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_909_end_mask_0 = const()[name = tensor<string, []>("op_909_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_909_cast_fp16 = slice_by_index(begin = var_909_begin_0, end = var_909_end_0, end_mask = var_909_end_mask_0, x = var_865_cast_fp16)[name = tensor<string, []>("op_909_cast_fp16")];
+            tensor<int32, [4]> var_916_begin_0 = const()[name = tensor<string, []>("op_916_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_916_end_0 = const()[name = tensor<string, []>("op_916_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_916_end_mask_0 = const()[name = tensor<string, []>("op_916_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_916_cast_fp16 = slice_by_index(begin = var_916_begin_0, end = var_916_end_0, end_mask = var_916_end_mask_0, x = var_865_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<int32, [4]> var_923_begin_0 = const()[name = tensor<string, []>("op_923_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_923_end_0 = const()[name = tensor<string, []>("op_923_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_923_end_mask_0 = const()[name = tensor<string, []>("op_923_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_923_cast_fp16 = slice_by_index(begin = var_923_begin_0, end = var_923_end_0, end_mask = var_923_end_mask_0, x = var_865_cast_fp16)[name = tensor<string, []>("op_923_cast_fp16")];
+            tensor<int32, [4]> var_930_begin_0 = const()[name = tensor<string, []>("op_930_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_930_end_0 = const()[name = tensor<string, []>("op_930_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_930_end_mask_0 = const()[name = tensor<string, []>("op_930_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_930_cast_fp16 = slice_by_index(begin = var_930_begin_0, end = var_930_end_0, end_mask = var_930_end_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_930_cast_fp16")];
+            tensor<int32, [4]> var_937_begin_0 = const()[name = tensor<string, []>("op_937_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_937_end_0 = const()[name = tensor<string, []>("op_937_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_937_end_mask_0 = const()[name = tensor<string, []>("op_937_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_937_cast_fp16 = slice_by_index(begin = var_937_begin_0, end = var_937_end_0, end_mask = var_937_end_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_937_cast_fp16")];
+            tensor<int32, [4]> var_944_begin_0 = const()[name = tensor<string, []>("op_944_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_944_end_0 = const()[name = tensor<string, []>("op_944_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_944_end_mask_0 = const()[name = tensor<string, []>("op_944_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_944_cast_fp16")];
+            tensor<int32, [4]> var_951_begin_0 = const()[name = tensor<string, []>("op_951_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_951_end_0 = const()[name = tensor<string, []>("op_951_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_951_end_mask_0 = const()[name = tensor<string, []>("op_951_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_951_cast_fp16 = slice_by_index(begin = var_951_begin_0, end = var_951_end_0, end_mask = var_951_end_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_951_cast_fp16")];
+            tensor<int32, [4]> var_958_begin_0 = const()[name = tensor<string, []>("op_958_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_958_end_0 = const()[name = tensor<string, []>("op_958_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_958_end_mask_0 = const()[name = tensor<string, []>("op_958_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_958_cast_fp16 = slice_by_index(begin = var_958_begin_0, end = var_958_end_0, end_mask = var_958_end_mask_0, x = var_873_cast_fp16)[name = tensor<string, []>("op_958_cast_fp16")];
+            tensor<int32, [4]> var_965_begin_0 = const()[name = tensor<string, []>("op_965_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_965_end_0 = const()[name = tensor<string, []>("op_965_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_965_end_mask_0 = const()[name = tensor<string, []>("op_965_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_965_cast_fp16 = slice_by_index(begin = var_965_begin_0, end = var_965_end_0, end_mask = var_965_end_mask_0, x = var_873_cast_fp16)[name = tensor<string, []>("op_965_cast_fp16")];
+            tensor<int32, [4]> var_972_begin_0 = const()[name = tensor<string, []>("op_972_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_972_end_0 = const()[name = tensor<string, []>("op_972_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_972_end_mask_0 = const()[name = tensor<string, []>("op_972_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = var_873_cast_fp16)[name = tensor<string, []>("op_972_cast_fp16")];
+            tensor<int32, [4]> var_979_begin_0 = const()[name = tensor<string, []>("op_979_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_979_end_0 = const()[name = tensor<string, []>("op_979_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_979_end_mask_0 = const()[name = tensor<string, []>("op_979_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = var_979_end_0, end_mask = var_979_end_mask_0, x = var_873_cast_fp16)[name = tensor<string, []>("op_979_cast_fp16")];
+            tensor<int32, [4]> var_986_begin_0 = const()[name = tensor<string, []>("op_986_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_986_end_0 = const()[name = tensor<string, []>("op_986_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_986_end_mask_0 = const()[name = tensor<string, []>("op_986_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_986_cast_fp16 = slice_by_index(begin = var_986_begin_0, end = var_986_end_0, end_mask = var_986_end_mask_0, x = var_877_cast_fp16)[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<int32, [4]> var_993_begin_0 = const()[name = tensor<string, []>("op_993_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_993_end_0 = const()[name = tensor<string, []>("op_993_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_993_end_mask_0 = const()[name = tensor<string, []>("op_993_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_993_cast_fp16 = slice_by_index(begin = var_993_begin_0, end = var_993_end_0, end_mask = var_993_end_mask_0, x = var_877_cast_fp16)[name = tensor<string, []>("op_993_cast_fp16")];
+            tensor<int32, [4]> var_1000_begin_0 = const()[name = tensor<string, []>("op_1000_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1000_end_0 = const()[name = tensor<string, []>("op_1000_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1000_end_mask_0 = const()[name = tensor<string, []>("op_1000_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = var_877_cast_fp16)[name = tensor<string, []>("op_1000_cast_fp16")];
+            tensor<int32, [4]> var_1007_begin_0 = const()[name = tensor<string, []>("op_1007_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1007_end_0 = const()[name = tensor<string, []>("op_1007_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1007_end_mask_0 = const()[name = tensor<string, []>("op_1007_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = var_877_cast_fp16)[name = tensor<string, []>("op_1007_cast_fp16")];
+            tensor<int32, [4]> var_1014_begin_0 = const()[name = tensor<string, []>("op_1014_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1014_end_0 = const()[name = tensor<string, []>("op_1014_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1014_end_mask_0 = const()[name = tensor<string, []>("op_1014_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1014_cast_fp16 = slice_by_index(begin = var_1014_begin_0, end = var_1014_end_0, end_mask = var_1014_end_mask_0, x = var_881_cast_fp16)[name = tensor<string, []>("op_1014_cast_fp16")];
+            tensor<int32, [4]> var_1021_begin_0 = const()[name = tensor<string, []>("op_1021_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1021_end_0 = const()[name = tensor<string, []>("op_1021_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1021_end_mask_0 = const()[name = tensor<string, []>("op_1021_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1021_cast_fp16 = slice_by_index(begin = var_1021_begin_0, end = var_1021_end_0, end_mask = var_1021_end_mask_0, x = var_881_cast_fp16)[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<int32, [4]> var_1028_begin_0 = const()[name = tensor<string, []>("op_1028_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1028_end_0 = const()[name = tensor<string, []>("op_1028_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1028_end_mask_0 = const()[name = tensor<string, []>("op_1028_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1028_cast_fp16 = slice_by_index(begin = var_1028_begin_0, end = var_1028_end_0, end_mask = var_1028_end_mask_0, x = var_881_cast_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<int32, [4]> var_1035_begin_0 = const()[name = tensor<string, []>("op_1035_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1035_end_0 = const()[name = tensor<string, []>("op_1035_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1035_end_mask_0 = const()[name = tensor<string, []>("op_1035_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1035_cast_fp16 = slice_by_index(begin = var_1035_begin_0, end = var_1035_end_0, end_mask = var_1035_end_mask_0, x = var_881_cast_fp16)[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<int32, [4]> var_1042_begin_0 = const()[name = tensor<string, []>("op_1042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1042_end_0 = const()[name = tensor<string, []>("op_1042_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1042_end_mask_0 = const()[name = tensor<string, []>("op_1042_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = var_885_cast_fp16)[name = tensor<string, []>("op_1042_cast_fp16")];
+            tensor<int32, [4]> var_1049_begin_0 = const()[name = tensor<string, []>("op_1049_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1049_end_0 = const()[name = tensor<string, []>("op_1049_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1049_end_mask_0 = const()[name = tensor<string, []>("op_1049_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1049_cast_fp16 = slice_by_index(begin = var_1049_begin_0, end = var_1049_end_0, end_mask = var_1049_end_mask_0, x = var_885_cast_fp16)[name = tensor<string, []>("op_1049_cast_fp16")];
+            tensor<int32, [4]> var_1056_begin_0 = const()[name = tensor<string, []>("op_1056_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1056_end_0 = const()[name = tensor<string, []>("op_1056_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1056_end_mask_0 = const()[name = tensor<string, []>("op_1056_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1056_cast_fp16 = slice_by_index(begin = var_1056_begin_0, end = var_1056_end_0, end_mask = var_1056_end_mask_0, x = var_885_cast_fp16)[name = tensor<string, []>("op_1056_cast_fp16")];
+            tensor<int32, [4]> var_1063_begin_0 = const()[name = tensor<string, []>("op_1063_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1063_end_0 = const()[name = tensor<string, []>("op_1063_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1063_end_mask_0 = const()[name = tensor<string, []>("op_1063_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1063_cast_fp16 = slice_by_index(begin = var_1063_begin_0, end = var_1063_end_0, end_mask = var_1063_end_mask_0, x = var_885_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<int32, [4]> var_1070_begin_0 = const()[name = tensor<string, []>("op_1070_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1070_end_0 = const()[name = tensor<string, []>("op_1070_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1070_end_mask_0 = const()[name = tensor<string, []>("op_1070_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1070_cast_fp16 = slice_by_index(begin = var_1070_begin_0, end = var_1070_end_0, end_mask = var_1070_end_mask_0, x = var_889_cast_fp16)[name = tensor<string, []>("op_1070_cast_fp16")];
+            tensor<int32, [4]> var_1077_begin_0 = const()[name = tensor<string, []>("op_1077_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1077_end_0 = const()[name = tensor<string, []>("op_1077_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1077_end_mask_0 = const()[name = tensor<string, []>("op_1077_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1077_cast_fp16 = slice_by_index(begin = var_1077_begin_0, end = var_1077_end_0, end_mask = var_1077_end_mask_0, x = var_889_cast_fp16)[name = tensor<string, []>("op_1077_cast_fp16")];
+            tensor<int32, [4]> var_1084_begin_0 = const()[name = tensor<string, []>("op_1084_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1084_end_0 = const()[name = tensor<string, []>("op_1084_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1084_end_mask_0 = const()[name = tensor<string, []>("op_1084_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1084_cast_fp16 = slice_by_index(begin = var_1084_begin_0, end = var_1084_end_0, end_mask = var_1084_end_mask_0, x = var_889_cast_fp16)[name = tensor<string, []>("op_1084_cast_fp16")];
+            tensor<int32, [4]> var_1091_begin_0 = const()[name = tensor<string, []>("op_1091_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1091_end_0 = const()[name = tensor<string, []>("op_1091_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1091_end_mask_0 = const()[name = tensor<string, []>("op_1091_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1091_cast_fp16 = slice_by_index(begin = var_1091_begin_0, end = var_1091_end_0, end_mask = var_1091_end_mask_0, x = var_889_cast_fp16)[name = tensor<string, []>("op_1091_cast_fp16")];
+            tensor<int32, [4]> var_1098_begin_0 = const()[name = tensor<string, []>("op_1098_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1098_end_0 = const()[name = tensor<string, []>("op_1098_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1098_end_mask_0 = const()[name = tensor<string, []>("op_1098_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = var_893_cast_fp16)[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<int32, [4]> var_1105_begin_0 = const()[name = tensor<string, []>("op_1105_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1105_end_0 = const()[name = tensor<string, []>("op_1105_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1105_end_mask_0 = const()[name = tensor<string, []>("op_1105_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1105_cast_fp16 = slice_by_index(begin = var_1105_begin_0, end = var_1105_end_0, end_mask = var_1105_end_mask_0, x = var_893_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<int32, [4]> var_1112_begin_0 = const()[name = tensor<string, []>("op_1112_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1112_end_0 = const()[name = tensor<string, []>("op_1112_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1112_end_mask_0 = const()[name = tensor<string, []>("op_1112_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = var_893_cast_fp16)[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<int32, [4]> var_1119_begin_0 = const()[name = tensor<string, []>("op_1119_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1119_end_0 = const()[name = tensor<string, []>("op_1119_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1119_end_mask_0 = const()[name = tensor<string, []>("op_1119_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1119_cast_fp16 = slice_by_index(begin = var_1119_begin_0, end = var_1119_end_0, end_mask = var_1119_end_mask_0, x = var_893_cast_fp16)[name = tensor<string, []>("op_1119_cast_fp16")];
+            tensor<int32, [4]> k_3_perm_0 = const()[name = tensor<string, []>("k_3_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1124_begin_0 = const()[name = tensor<string, []>("op_1124_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1124_end_0 = const()[name = tensor<string, []>("op_1124_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1124_end_mask_0 = const()[name = tensor<string, []>("op_1124_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_4 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<int32, [4]> var_1128_begin_0 = const()[name = tensor<string, []>("op_1128_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1128_end_0 = const()[name = tensor<string, []>("op_1128_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1128_end_mask_0 = const()[name = tensor<string, []>("op_1128_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1128_cast_fp16 = slice_by_index(begin = var_1128_begin_0, end = var_1128_end_0, end_mask = var_1128_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<int32, [4]> var_1132_begin_0 = const()[name = tensor<string, []>("op_1132_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1132_end_0 = const()[name = tensor<string, []>("op_1132_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1132_end_mask_0 = const()[name = tensor<string, []>("op_1132_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1132_cast_fp16 = slice_by_index(begin = var_1132_begin_0, end = var_1132_end_0, end_mask = var_1132_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<int32, [4]> var_1136_begin_0 = const()[name = tensor<string, []>("op_1136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1136_end_0 = const()[name = tensor<string, []>("op_1136_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1136_end_mask_0 = const()[name = tensor<string, []>("op_1136_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<int32, [4]> var_1140_begin_0 = const()[name = tensor<string, []>("op_1140_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1140_end_0 = const()[name = tensor<string, []>("op_1140_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1140_end_mask_0 = const()[name = tensor<string, []>("op_1140_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1140_cast_fp16 = slice_by_index(begin = var_1140_begin_0, end = var_1140_end_0, end_mask = var_1140_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1140_cast_fp16")];
+            tensor<int32, [4]> var_1144_begin_0 = const()[name = tensor<string, []>("op_1144_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1144_end_0 = const()[name = tensor<string, []>("op_1144_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1144_end_mask_0 = const()[name = tensor<string, []>("op_1144_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1144_cast_fp16 = slice_by_index(begin = var_1144_begin_0, end = var_1144_end_0, end_mask = var_1144_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1144_cast_fp16")];
+            tensor<int32, [4]> var_1148_begin_0 = const()[name = tensor<string, []>("op_1148_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_1148_end_0 = const()[name = tensor<string, []>("op_1148_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_1148_end_mask_0 = const()[name = tensor<string, []>("op_1148_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1148_cast_fp16")];
+            tensor<int32, [4]> var_1152_begin_0 = const()[name = tensor<string, []>("op_1152_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_1152_end_0 = const()[name = tensor<string, []>("op_1152_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_1152_end_mask_0 = const()[name = tensor<string, []>("op_1152_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1152_cast_fp16 = slice_by_index(begin = var_1152_begin_0, end = var_1152_end_0, end_mask = var_1152_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_1152_cast_fp16")];
+            tensor<int32, [4]> var_1154_begin_0 = const()[name = tensor<string, []>("op_1154_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1154_end_0 = const()[name = tensor<string, []>("op_1154_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1154_end_mask_0 = const()[name = tensor<string, []>("op_1154_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1154_cast_fp16")];
+            tensor<int32, [4]> var_1158_begin_0 = const()[name = tensor<string, []>("op_1158_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1158_end_0 = const()[name = tensor<string, []>("op_1158_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1158_end_mask_0 = const()[name = tensor<string, []>("op_1158_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1158_cast_fp16")];
+            tensor<int32, [4]> var_1162_begin_0 = const()[name = tensor<string, []>("op_1162_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1162_end_0 = const()[name = tensor<string, []>("op_1162_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1162_end_mask_0 = const()[name = tensor<string, []>("op_1162_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1162_cast_fp16")];
+            tensor<int32, [4]> var_1166_begin_0 = const()[name = tensor<string, []>("op_1166_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1166_end_0 = const()[name = tensor<string, []>("op_1166_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1166_end_mask_0 = const()[name = tensor<string, []>("op_1166_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1166_cast_fp16")];
+            tensor<int32, [4]> var_1170_begin_0 = const()[name = tensor<string, []>("op_1170_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1170_end_0 = const()[name = tensor<string, []>("op_1170_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1170_end_mask_0 = const()[name = tensor<string, []>("op_1170_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1170_cast_fp16")];
+            tensor<int32, [4]> var_1174_begin_0 = const()[name = tensor<string, []>("op_1174_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1174_end_0 = const()[name = tensor<string, []>("op_1174_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1174_end_mask_0 = const()[name = tensor<string, []>("op_1174_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1174_cast_fp16")];
+            tensor<int32, [4]> var_1178_begin_0 = const()[name = tensor<string, []>("op_1178_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1178_end_0 = const()[name = tensor<string, []>("op_1178_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1178_end_mask_0 = const()[name = tensor<string, []>("op_1178_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1178_cast_fp16")];
+            tensor<int32, [4]> var_1182_begin_0 = const()[name = tensor<string, []>("op_1182_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1182_end_0 = const()[name = tensor<string, []>("op_1182_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1182_end_mask_0 = const()[name = tensor<string, []>("op_1182_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1182_cast_fp16")];
+            tensor<string, []> var_1186_equation_0 = const()[name = tensor<string, []>("op_1186_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1186_cast_fp16 = einsum(equation = var_1186_equation_0, values = (var_1124_cast_fp16, var_902_cast_fp16))[name = tensor<string, []>("op_1186_cast_fp16")];
+            tensor<fp16, []> var_1187_to_fp16 = const()[name = tensor<string, []>("op_1187_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_65_cast_fp16 = mul(x = var_1186_cast_fp16, y = var_1187_to_fp16)[name = tensor<string, []>("aw_chunk_65_cast_fp16")];
+            tensor<string, []> var_1190_equation_0 = const()[name = tensor<string, []>("op_1190_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1190_cast_fp16 = einsum(equation = var_1190_equation_0, values = (var_1124_cast_fp16, var_909_cast_fp16))[name = tensor<string, []>("op_1190_cast_fp16")];
+            tensor<fp16, []> var_1191_to_fp16 = const()[name = tensor<string, []>("op_1191_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_67_cast_fp16 = mul(x = var_1190_cast_fp16, y = var_1191_to_fp16)[name = tensor<string, []>("aw_chunk_67_cast_fp16")];
+            tensor<string, []> var_1194_equation_0 = const()[name = tensor<string, []>("op_1194_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1194_cast_fp16 = einsum(equation = var_1194_equation_0, values = (var_1124_cast_fp16, var_916_cast_fp16))[name = tensor<string, []>("op_1194_cast_fp16")];
+            tensor<fp16, []> var_1195_to_fp16 = const()[name = tensor<string, []>("op_1195_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_69_cast_fp16 = mul(x = var_1194_cast_fp16, y = var_1195_to_fp16)[name = tensor<string, []>("aw_chunk_69_cast_fp16")];
+            tensor<string, []> var_1198_equation_0 = const()[name = tensor<string, []>("op_1198_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1198_cast_fp16 = einsum(equation = var_1198_equation_0, values = (var_1124_cast_fp16, var_923_cast_fp16))[name = tensor<string, []>("op_1198_cast_fp16")];
+            tensor<fp16, []> var_1199_to_fp16 = const()[name = tensor<string, []>("op_1199_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_71_cast_fp16 = mul(x = var_1198_cast_fp16, y = var_1199_to_fp16)[name = tensor<string, []>("aw_chunk_71_cast_fp16")];
+            tensor<string, []> var_1202_equation_0 = const()[name = tensor<string, []>("op_1202_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1202_cast_fp16 = einsum(equation = var_1202_equation_0, values = (var_1128_cast_fp16, var_930_cast_fp16))[name = tensor<string, []>("op_1202_cast_fp16")];
+            tensor<fp16, []> var_1203_to_fp16 = const()[name = tensor<string, []>("op_1203_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_73_cast_fp16 = mul(x = var_1202_cast_fp16, y = var_1203_to_fp16)[name = tensor<string, []>("aw_chunk_73_cast_fp16")];
+            tensor<string, []> var_1206_equation_0 = const()[name = tensor<string, []>("op_1206_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1206_cast_fp16 = einsum(equation = var_1206_equation_0, values = (var_1128_cast_fp16, var_937_cast_fp16))[name = tensor<string, []>("op_1206_cast_fp16")];
+            tensor<fp16, []> var_1207_to_fp16 = const()[name = tensor<string, []>("op_1207_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_75_cast_fp16 = mul(x = var_1206_cast_fp16, y = var_1207_to_fp16)[name = tensor<string, []>("aw_chunk_75_cast_fp16")];
+            tensor<string, []> var_1210_equation_0 = const()[name = tensor<string, []>("op_1210_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1210_cast_fp16 = einsum(equation = var_1210_equation_0, values = (var_1128_cast_fp16, var_944_cast_fp16))[name = tensor<string, []>("op_1210_cast_fp16")];
+            tensor<fp16, []> var_1211_to_fp16 = const()[name = tensor<string, []>("op_1211_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_77_cast_fp16 = mul(x = var_1210_cast_fp16, y = var_1211_to_fp16)[name = tensor<string, []>("aw_chunk_77_cast_fp16")];
+            tensor<string, []> var_1214_equation_0 = const()[name = tensor<string, []>("op_1214_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1214_cast_fp16 = einsum(equation = var_1214_equation_0, values = (var_1128_cast_fp16, var_951_cast_fp16))[name = tensor<string, []>("op_1214_cast_fp16")];
+            tensor<fp16, []> var_1215_to_fp16 = const()[name = tensor<string, []>("op_1215_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_79_cast_fp16 = mul(x = var_1214_cast_fp16, y = var_1215_to_fp16)[name = tensor<string, []>("aw_chunk_79_cast_fp16")];
+            tensor<string, []> var_1218_equation_0 = const()[name = tensor<string, []>("op_1218_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1218_cast_fp16 = einsum(equation = var_1218_equation_0, values = (var_1132_cast_fp16, var_958_cast_fp16))[name = tensor<string, []>("op_1218_cast_fp16")];
+            tensor<fp16, []> var_1219_to_fp16 = const()[name = tensor<string, []>("op_1219_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_81_cast_fp16 = mul(x = var_1218_cast_fp16, y = var_1219_to_fp16)[name = tensor<string, []>("aw_chunk_81_cast_fp16")];
+            tensor<string, []> var_1222_equation_0 = const()[name = tensor<string, []>("op_1222_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1222_cast_fp16 = einsum(equation = var_1222_equation_0, values = (var_1132_cast_fp16, var_965_cast_fp16))[name = tensor<string, []>("op_1222_cast_fp16")];
+            tensor<fp16, []> var_1223_to_fp16 = const()[name = tensor<string, []>("op_1223_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_83_cast_fp16 = mul(x = var_1222_cast_fp16, y = var_1223_to_fp16)[name = tensor<string, []>("aw_chunk_83_cast_fp16")];
+            tensor<string, []> var_1226_equation_0 = const()[name = tensor<string, []>("op_1226_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1226_cast_fp16 = einsum(equation = var_1226_equation_0, values = (var_1132_cast_fp16, var_972_cast_fp16))[name = tensor<string, []>("op_1226_cast_fp16")];
+            tensor<fp16, []> var_1227_to_fp16 = const()[name = tensor<string, []>("op_1227_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_85_cast_fp16 = mul(x = var_1226_cast_fp16, y = var_1227_to_fp16)[name = tensor<string, []>("aw_chunk_85_cast_fp16")];
+            tensor<string, []> var_1230_equation_0 = const()[name = tensor<string, []>("op_1230_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1230_cast_fp16 = einsum(equation = var_1230_equation_0, values = (var_1132_cast_fp16, var_979_cast_fp16))[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<fp16, []> var_1231_to_fp16 = const()[name = tensor<string, []>("op_1231_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_87_cast_fp16 = mul(x = var_1230_cast_fp16, y = var_1231_to_fp16)[name = tensor<string, []>("aw_chunk_87_cast_fp16")];
+            tensor<string, []> var_1234_equation_0 = const()[name = tensor<string, []>("op_1234_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1234_cast_fp16 = einsum(equation = var_1234_equation_0, values = (var_1136_cast_fp16, var_986_cast_fp16))[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<fp16, []> var_1235_to_fp16 = const()[name = tensor<string, []>("op_1235_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_89_cast_fp16 = mul(x = var_1234_cast_fp16, y = var_1235_to_fp16)[name = tensor<string, []>("aw_chunk_89_cast_fp16")];
+            tensor<string, []> var_1238_equation_0 = const()[name = tensor<string, []>("op_1238_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1136_cast_fp16, var_993_cast_fp16))[name = tensor<string, []>("op_1238_cast_fp16")];
+            tensor<fp16, []> var_1239_to_fp16 = const()[name = tensor<string, []>("op_1239_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_91_cast_fp16 = mul(x = var_1238_cast_fp16, y = var_1239_to_fp16)[name = tensor<string, []>("aw_chunk_91_cast_fp16")];
+            tensor<string, []> var_1242_equation_0 = const()[name = tensor<string, []>("op_1242_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1136_cast_fp16, var_1000_cast_fp16))[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<fp16, []> var_1243_to_fp16 = const()[name = tensor<string, []>("op_1243_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_93_cast_fp16 = mul(x = var_1242_cast_fp16, y = var_1243_to_fp16)[name = tensor<string, []>("aw_chunk_93_cast_fp16")];
+            tensor<string, []> var_1246_equation_0 = const()[name = tensor<string, []>("op_1246_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1136_cast_fp16, var_1007_cast_fp16))[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<fp16, []> var_1247_to_fp16 = const()[name = tensor<string, []>("op_1247_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_95_cast_fp16 = mul(x = var_1246_cast_fp16, y = var_1247_to_fp16)[name = tensor<string, []>("aw_chunk_95_cast_fp16")];
+            tensor<string, []> var_1250_equation_0 = const()[name = tensor<string, []>("op_1250_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1140_cast_fp16, var_1014_cast_fp16))[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<fp16, []> var_1251_to_fp16 = const()[name = tensor<string, []>("op_1251_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_97_cast_fp16 = mul(x = var_1250_cast_fp16, y = var_1251_to_fp16)[name = tensor<string, []>("aw_chunk_97_cast_fp16")];
+            tensor<string, []> var_1254_equation_0 = const()[name = tensor<string, []>("op_1254_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1254_cast_fp16 = einsum(equation = var_1254_equation_0, values = (var_1140_cast_fp16, var_1021_cast_fp16))[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<fp16, []> var_1255_to_fp16 = const()[name = tensor<string, []>("op_1255_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_99_cast_fp16 = mul(x = var_1254_cast_fp16, y = var_1255_to_fp16)[name = tensor<string, []>("aw_chunk_99_cast_fp16")];
+            tensor<string, []> var_1258_equation_0 = const()[name = tensor<string, []>("op_1258_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_1140_cast_fp16, var_1028_cast_fp16))[name = tensor<string, []>("op_1258_cast_fp16")];
+            tensor<fp16, []> var_1259_to_fp16 = const()[name = tensor<string, []>("op_1259_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_101_cast_fp16 = mul(x = var_1258_cast_fp16, y = var_1259_to_fp16)[name = tensor<string, []>("aw_chunk_101_cast_fp16")];
+            tensor<string, []> var_1262_equation_0 = const()[name = tensor<string, []>("op_1262_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1262_cast_fp16 = einsum(equation = var_1262_equation_0, values = (var_1140_cast_fp16, var_1035_cast_fp16))[name = tensor<string, []>("op_1262_cast_fp16")];
+            tensor<fp16, []> var_1263_to_fp16 = const()[name = tensor<string, []>("op_1263_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_103_cast_fp16 = mul(x = var_1262_cast_fp16, y = var_1263_to_fp16)[name = tensor<string, []>("aw_chunk_103_cast_fp16")];
+            tensor<string, []> var_1266_equation_0 = const()[name = tensor<string, []>("op_1266_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1266_cast_fp16 = einsum(equation = var_1266_equation_0, values = (var_1144_cast_fp16, var_1042_cast_fp16))[name = tensor<string, []>("op_1266_cast_fp16")];
+            tensor<fp16, []> var_1267_to_fp16 = const()[name = tensor<string, []>("op_1267_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_105_cast_fp16 = mul(x = var_1266_cast_fp16, y = var_1267_to_fp16)[name = tensor<string, []>("aw_chunk_105_cast_fp16")];
+            tensor<string, []> var_1270_equation_0 = const()[name = tensor<string, []>("op_1270_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1270_cast_fp16 = einsum(equation = var_1270_equation_0, values = (var_1144_cast_fp16, var_1049_cast_fp16))[name = tensor<string, []>("op_1270_cast_fp16")];
+            tensor<fp16, []> var_1271_to_fp16 = const()[name = tensor<string, []>("op_1271_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_107_cast_fp16 = mul(x = var_1270_cast_fp16, y = var_1271_to_fp16)[name = tensor<string, []>("aw_chunk_107_cast_fp16")];
+            tensor<string, []> var_1274_equation_0 = const()[name = tensor<string, []>("op_1274_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1274_cast_fp16 = einsum(equation = var_1274_equation_0, values = (var_1144_cast_fp16, var_1056_cast_fp16))[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<fp16, []> var_1275_to_fp16 = const()[name = tensor<string, []>("op_1275_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_109_cast_fp16 = mul(x = var_1274_cast_fp16, y = var_1275_to_fp16)[name = tensor<string, []>("aw_chunk_109_cast_fp16")];
+            tensor<string, []> var_1278_equation_0 = const()[name = tensor<string, []>("op_1278_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1278_cast_fp16 = einsum(equation = var_1278_equation_0, values = (var_1144_cast_fp16, var_1063_cast_fp16))[name = tensor<string, []>("op_1278_cast_fp16")];
+            tensor<fp16, []> var_1279_to_fp16 = const()[name = tensor<string, []>("op_1279_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_111_cast_fp16 = mul(x = var_1278_cast_fp16, y = var_1279_to_fp16)[name = tensor<string, []>("aw_chunk_111_cast_fp16")];
+            tensor<string, []> var_1282_equation_0 = const()[name = tensor<string, []>("op_1282_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1282_cast_fp16 = einsum(equation = var_1282_equation_0, values = (var_1148_cast_fp16, var_1070_cast_fp16))[name = tensor<string, []>("op_1282_cast_fp16")];
+            tensor<fp16, []> var_1283_to_fp16 = const()[name = tensor<string, []>("op_1283_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_113_cast_fp16 = mul(x = var_1282_cast_fp16, y = var_1283_to_fp16)[name = tensor<string, []>("aw_chunk_113_cast_fp16")];
+            tensor<string, []> var_1286_equation_0 = const()[name = tensor<string, []>("op_1286_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1286_cast_fp16 = einsum(equation = var_1286_equation_0, values = (var_1148_cast_fp16, var_1077_cast_fp16))[name = tensor<string, []>("op_1286_cast_fp16")];
+            tensor<fp16, []> var_1287_to_fp16 = const()[name = tensor<string, []>("op_1287_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_115_cast_fp16 = mul(x = var_1286_cast_fp16, y = var_1287_to_fp16)[name = tensor<string, []>("aw_chunk_115_cast_fp16")];
+            tensor<string, []> var_1290_equation_0 = const()[name = tensor<string, []>("op_1290_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1290_cast_fp16 = einsum(equation = var_1290_equation_0, values = (var_1148_cast_fp16, var_1084_cast_fp16))[name = tensor<string, []>("op_1290_cast_fp16")];
+            tensor<fp16, []> var_1291_to_fp16 = const()[name = tensor<string, []>("op_1291_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_117_cast_fp16 = mul(x = var_1290_cast_fp16, y = var_1291_to_fp16)[name = tensor<string, []>("aw_chunk_117_cast_fp16")];
+            tensor<string, []> var_1294_equation_0 = const()[name = tensor<string, []>("op_1294_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1294_cast_fp16 = einsum(equation = var_1294_equation_0, values = (var_1148_cast_fp16, var_1091_cast_fp16))[name = tensor<string, []>("op_1294_cast_fp16")];
+            tensor<fp16, []> var_1295_to_fp16 = const()[name = tensor<string, []>("op_1295_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_119_cast_fp16 = mul(x = var_1294_cast_fp16, y = var_1295_to_fp16)[name = tensor<string, []>("aw_chunk_119_cast_fp16")];
+            tensor<string, []> var_1298_equation_0 = const()[name = tensor<string, []>("op_1298_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1298_cast_fp16 = einsum(equation = var_1298_equation_0, values = (var_1152_cast_fp16, var_1098_cast_fp16))[name = tensor<string, []>("op_1298_cast_fp16")];
+            tensor<fp16, []> var_1299_to_fp16 = const()[name = tensor<string, []>("op_1299_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_121_cast_fp16 = mul(x = var_1298_cast_fp16, y = var_1299_to_fp16)[name = tensor<string, []>("aw_chunk_121_cast_fp16")];
+            tensor<string, []> var_1302_equation_0 = const()[name = tensor<string, []>("op_1302_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1302_cast_fp16 = einsum(equation = var_1302_equation_0, values = (var_1152_cast_fp16, var_1105_cast_fp16))[name = tensor<string, []>("op_1302_cast_fp16")];
+            tensor<fp16, []> var_1303_to_fp16 = const()[name = tensor<string, []>("op_1303_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_123_cast_fp16 = mul(x = var_1302_cast_fp16, y = var_1303_to_fp16)[name = tensor<string, []>("aw_chunk_123_cast_fp16")];
+            tensor<string, []> var_1306_equation_0 = const()[name = tensor<string, []>("op_1306_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1306_cast_fp16 = einsum(equation = var_1306_equation_0, values = (var_1152_cast_fp16, var_1112_cast_fp16))[name = tensor<string, []>("op_1306_cast_fp16")];
+            tensor<fp16, []> var_1307_to_fp16 = const()[name = tensor<string, []>("op_1307_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_125_cast_fp16 = mul(x = var_1306_cast_fp16, y = var_1307_to_fp16)[name = tensor<string, []>("aw_chunk_125_cast_fp16")];
+            tensor<string, []> var_1310_equation_0 = const()[name = tensor<string, []>("op_1310_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1310_cast_fp16 = einsum(equation = var_1310_equation_0, values = (var_1152_cast_fp16, var_1119_cast_fp16))[name = tensor<string, []>("op_1310_cast_fp16")];
+            tensor<fp16, []> var_1311_to_fp16 = const()[name = tensor<string, []>("op_1311_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_127_cast_fp16 = mul(x = var_1310_cast_fp16, y = var_1311_to_fp16)[name = tensor<string, []>("aw_chunk_127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1313_cast_fp16 = softmax(axis = var_810, x = aw_chunk_65_cast_fp16)[name = tensor<string, []>("op_1313_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1314_cast_fp16 = softmax(axis = var_810, x = aw_chunk_67_cast_fp16)[name = tensor<string, []>("op_1314_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1315_cast_fp16 = softmax(axis = var_810, x = aw_chunk_69_cast_fp16)[name = tensor<string, []>("op_1315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1316_cast_fp16 = softmax(axis = var_810, x = aw_chunk_71_cast_fp16)[name = tensor<string, []>("op_1316_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1317_cast_fp16 = softmax(axis = var_810, x = aw_chunk_73_cast_fp16)[name = tensor<string, []>("op_1317_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1318_cast_fp16 = softmax(axis = var_810, x = aw_chunk_75_cast_fp16)[name = tensor<string, []>("op_1318_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1319_cast_fp16 = softmax(axis = var_810, x = aw_chunk_77_cast_fp16)[name = tensor<string, []>("op_1319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1320_cast_fp16 = softmax(axis = var_810, x = aw_chunk_79_cast_fp16)[name = tensor<string, []>("op_1320_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1321_cast_fp16 = softmax(axis = var_810, x = aw_chunk_81_cast_fp16)[name = tensor<string, []>("op_1321_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1322_cast_fp16 = softmax(axis = var_810, x = aw_chunk_83_cast_fp16)[name = tensor<string, []>("op_1322_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1323_cast_fp16 = softmax(axis = var_810, x = aw_chunk_85_cast_fp16)[name = tensor<string, []>("op_1323_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1324_cast_fp16 = softmax(axis = var_810, x = aw_chunk_87_cast_fp16)[name = tensor<string, []>("op_1324_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1325_cast_fp16 = softmax(axis = var_810, x = aw_chunk_89_cast_fp16)[name = tensor<string, []>("op_1325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1326_cast_fp16 = softmax(axis = var_810, x = aw_chunk_91_cast_fp16)[name = tensor<string, []>("op_1326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1327_cast_fp16 = softmax(axis = var_810, x = aw_chunk_93_cast_fp16)[name = tensor<string, []>("op_1327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1328_cast_fp16 = softmax(axis = var_810, x = aw_chunk_95_cast_fp16)[name = tensor<string, []>("op_1328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1329_cast_fp16 = softmax(axis = var_810, x = aw_chunk_97_cast_fp16)[name = tensor<string, []>("op_1329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1330_cast_fp16 = softmax(axis = var_810, x = aw_chunk_99_cast_fp16)[name = tensor<string, []>("op_1330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1331_cast_fp16 = softmax(axis = var_810, x = aw_chunk_101_cast_fp16)[name = tensor<string, []>("op_1331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1332_cast_fp16 = softmax(axis = var_810, x = aw_chunk_103_cast_fp16)[name = tensor<string, []>("op_1332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1333_cast_fp16 = softmax(axis = var_810, x = aw_chunk_105_cast_fp16)[name = tensor<string, []>("op_1333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1334_cast_fp16 = softmax(axis = var_810, x = aw_chunk_107_cast_fp16)[name = tensor<string, []>("op_1334_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1335_cast_fp16 = softmax(axis = var_810, x = aw_chunk_109_cast_fp16)[name = tensor<string, []>("op_1335_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1336_cast_fp16 = softmax(axis = var_810, x = aw_chunk_111_cast_fp16)[name = tensor<string, []>("op_1336_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1337_cast_fp16 = softmax(axis = var_810, x = aw_chunk_113_cast_fp16)[name = tensor<string, []>("op_1337_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1338_cast_fp16 = softmax(axis = var_810, x = aw_chunk_115_cast_fp16)[name = tensor<string, []>("op_1338_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1339_cast_fp16 = softmax(axis = var_810, x = aw_chunk_117_cast_fp16)[name = tensor<string, []>("op_1339_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1340_cast_fp16 = softmax(axis = var_810, x = aw_chunk_119_cast_fp16)[name = tensor<string, []>("op_1340_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1341_cast_fp16 = softmax(axis = var_810, x = aw_chunk_121_cast_fp16)[name = tensor<string, []>("op_1341_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1342_cast_fp16 = softmax(axis = var_810, x = aw_chunk_123_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1343_cast_fp16 = softmax(axis = var_810, x = aw_chunk_125_cast_fp16)[name = tensor<string, []>("op_1343_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1344_cast_fp16 = softmax(axis = var_810, x = aw_chunk_127_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
+            tensor<string, []> var_1346_equation_0 = const()[name = tensor<string, []>("op_1346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1346_cast_fp16 = einsum(equation = var_1346_equation_0, values = (var_1154_cast_fp16, var_1313_cast_fp16))[name = tensor<string, []>("op_1346_cast_fp16")];
+            tensor<string, []> var_1348_equation_0 = const()[name = tensor<string, []>("op_1348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1348_cast_fp16 = einsum(equation = var_1348_equation_0, values = (var_1154_cast_fp16, var_1314_cast_fp16))[name = tensor<string, []>("op_1348_cast_fp16")];
+            tensor<string, []> var_1350_equation_0 = const()[name = tensor<string, []>("op_1350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1350_cast_fp16 = einsum(equation = var_1350_equation_0, values = (var_1154_cast_fp16, var_1315_cast_fp16))[name = tensor<string, []>("op_1350_cast_fp16")];
+            tensor<string, []> var_1352_equation_0 = const()[name = tensor<string, []>("op_1352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1154_cast_fp16, var_1316_cast_fp16))[name = tensor<string, []>("op_1352_cast_fp16")];
+            tensor<string, []> var_1354_equation_0 = const()[name = tensor<string, []>("op_1354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1158_cast_fp16, var_1317_cast_fp16))[name = tensor<string, []>("op_1354_cast_fp16")];
+            tensor<string, []> var_1356_equation_0 = const()[name = tensor<string, []>("op_1356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1158_cast_fp16, var_1318_cast_fp16))[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<string, []> var_1358_equation_0 = const()[name = tensor<string, []>("op_1358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1158_cast_fp16, var_1319_cast_fp16))[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<string, []> var_1360_equation_0 = const()[name = tensor<string, []>("op_1360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1158_cast_fp16, var_1320_cast_fp16))[name = tensor<string, []>("op_1360_cast_fp16")];
+            tensor<string, []> var_1362_equation_0 = const()[name = tensor<string, []>("op_1362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1162_cast_fp16, var_1321_cast_fp16))[name = tensor<string, []>("op_1362_cast_fp16")];
+            tensor<string, []> var_1364_equation_0 = const()[name = tensor<string, []>("op_1364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1162_cast_fp16, var_1322_cast_fp16))[name = tensor<string, []>("op_1364_cast_fp16")];
+            tensor<string, []> var_1366_equation_0 = const()[name = tensor<string, []>("op_1366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1162_cast_fp16, var_1323_cast_fp16))[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<string, []> var_1368_equation_0 = const()[name = tensor<string, []>("op_1368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1162_cast_fp16, var_1324_cast_fp16))[name = tensor<string, []>("op_1368_cast_fp16")];
+            tensor<string, []> var_1370_equation_0 = const()[name = tensor<string, []>("op_1370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1166_cast_fp16, var_1325_cast_fp16))[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<string, []> var_1372_equation_0 = const()[name = tensor<string, []>("op_1372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1166_cast_fp16, var_1326_cast_fp16))[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<string, []> var_1374_equation_0 = const()[name = tensor<string, []>("op_1374_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1166_cast_fp16, var_1327_cast_fp16))[name = tensor<string, []>("op_1374_cast_fp16")];
+            tensor<string, []> var_1376_equation_0 = const()[name = tensor<string, []>("op_1376_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1166_cast_fp16, var_1328_cast_fp16))[name = tensor<string, []>("op_1376_cast_fp16")];
+            tensor<string, []> var_1378_equation_0 = const()[name = tensor<string, []>("op_1378_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1170_cast_fp16, var_1329_cast_fp16))[name = tensor<string, []>("op_1378_cast_fp16")];
+            tensor<string, []> var_1380_equation_0 = const()[name = tensor<string, []>("op_1380_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1170_cast_fp16, var_1330_cast_fp16))[name = tensor<string, []>("op_1380_cast_fp16")];
+            tensor<string, []> var_1382_equation_0 = const()[name = tensor<string, []>("op_1382_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1170_cast_fp16, var_1331_cast_fp16))[name = tensor<string, []>("op_1382_cast_fp16")];
+            tensor<string, []> var_1384_equation_0 = const()[name = tensor<string, []>("op_1384_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1170_cast_fp16, var_1332_cast_fp16))[name = tensor<string, []>("op_1384_cast_fp16")];
+            tensor<string, []> var_1386_equation_0 = const()[name = tensor<string, []>("op_1386_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1174_cast_fp16, var_1333_cast_fp16))[name = tensor<string, []>("op_1386_cast_fp16")];
+            tensor<string, []> var_1388_equation_0 = const()[name = tensor<string, []>("op_1388_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1174_cast_fp16, var_1334_cast_fp16))[name = tensor<string, []>("op_1388_cast_fp16")];
+            tensor<string, []> var_1390_equation_0 = const()[name = tensor<string, []>("op_1390_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1174_cast_fp16, var_1335_cast_fp16))[name = tensor<string, []>("op_1390_cast_fp16")];
+            tensor<string, []> var_1392_equation_0 = const()[name = tensor<string, []>("op_1392_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1174_cast_fp16, var_1336_cast_fp16))[name = tensor<string, []>("op_1392_cast_fp16")];
+            tensor<string, []> var_1394_equation_0 = const()[name = tensor<string, []>("op_1394_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1178_cast_fp16, var_1337_cast_fp16))[name = tensor<string, []>("op_1394_cast_fp16")];
+            tensor<string, []> var_1396_equation_0 = const()[name = tensor<string, []>("op_1396_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1178_cast_fp16, var_1338_cast_fp16))[name = tensor<string, []>("op_1396_cast_fp16")];
+            tensor<string, []> var_1398_equation_0 = const()[name = tensor<string, []>("op_1398_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1178_cast_fp16, var_1339_cast_fp16))[name = tensor<string, []>("op_1398_cast_fp16")];
+            tensor<string, []> var_1400_equation_0 = const()[name = tensor<string, []>("op_1400_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1400_cast_fp16 = einsum(equation = var_1400_equation_0, values = (var_1178_cast_fp16, var_1340_cast_fp16))[name = tensor<string, []>("op_1400_cast_fp16")];
+            tensor<string, []> var_1402_equation_0 = const()[name = tensor<string, []>("op_1402_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1402_cast_fp16 = einsum(equation = var_1402_equation_0, values = (var_1182_cast_fp16, var_1341_cast_fp16))[name = tensor<string, []>("op_1402_cast_fp16")];
+            tensor<string, []> var_1404_equation_0 = const()[name = tensor<string, []>("op_1404_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1404_cast_fp16 = einsum(equation = var_1404_equation_0, values = (var_1182_cast_fp16, var_1342_cast_fp16))[name = tensor<string, []>("op_1404_cast_fp16")];
+            tensor<string, []> var_1406_equation_0 = const()[name = tensor<string, []>("op_1406_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1406_cast_fp16 = einsum(equation = var_1406_equation_0, values = (var_1182_cast_fp16, var_1343_cast_fp16))[name = tensor<string, []>("op_1406_cast_fp16")];
+            tensor<string, []> var_1408_equation_0 = const()[name = tensor<string, []>("op_1408_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1408_cast_fp16 = einsum(equation = var_1408_equation_0, values = (var_1182_cast_fp16, var_1344_cast_fp16))[name = tensor<string, []>("op_1408_cast_fp16")];
+            tensor<bool, []> var_1410_interleave_0 = const()[name = tensor<string, []>("op_1410_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1410_cast_fp16 = concat(axis = var_797, interleave = var_1410_interleave_0, values = (var_1346_cast_fp16, var_1348_cast_fp16, var_1350_cast_fp16, var_1352_cast_fp16))[name = tensor<string, []>("op_1410_cast_fp16")];
+            tensor<bool, []> var_1412_interleave_0 = const()[name = tensor<string, []>("op_1412_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1412_cast_fp16 = concat(axis = var_797, interleave = var_1412_interleave_0, values = (var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16))[name = tensor<string, []>("op_1412_cast_fp16")];
+            tensor<bool, []> var_1414_interleave_0 = const()[name = tensor<string, []>("op_1414_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1414_cast_fp16 = concat(axis = var_797, interleave = var_1414_interleave_0, values = (var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16))[name = tensor<string, []>("op_1414_cast_fp16")];
+            tensor<bool, []> var_1416_interleave_0 = const()[name = tensor<string, []>("op_1416_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1416_cast_fp16 = concat(axis = var_797, interleave = var_1416_interleave_0, values = (var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16))[name = tensor<string, []>("op_1416_cast_fp16")];
+            tensor<bool, []> var_1418_interleave_0 = const()[name = tensor<string, []>("op_1418_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1418_cast_fp16 = concat(axis = var_797, interleave = var_1418_interleave_0, values = (var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16))[name = tensor<string, []>("op_1418_cast_fp16")];
+            tensor<bool, []> var_1420_interleave_0 = const()[name = tensor<string, []>("op_1420_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1420_cast_fp16 = concat(axis = var_797, interleave = var_1420_interleave_0, values = (var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16))[name = tensor<string, []>("op_1420_cast_fp16")];
+            tensor<bool, []> var_1422_interleave_0 = const()[name = tensor<string, []>("op_1422_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1422_cast_fp16 = concat(axis = var_797, interleave = var_1422_interleave_0, values = (var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16, var_1400_cast_fp16))[name = tensor<string, []>("op_1422_cast_fp16")];
+            tensor<bool, []> var_1424_interleave_0 = const()[name = tensor<string, []>("op_1424_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1424_cast_fp16 = concat(axis = var_797, interleave = var_1424_interleave_0, values = (var_1402_cast_fp16, var_1404_cast_fp16, var_1406_cast_fp16, var_1408_cast_fp16))[name = tensor<string, []>("op_1424_cast_fp16")];
+            tensor<bool, []> input_9_interleave_0 = const()[name = tensor<string, []>("input_9_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_9_cast_fp16 = concat(axis = var_810, interleave = input_9_interleave_0, values = (var_1410_cast_fp16, var_1412_cast_fp16, var_1414_cast_fp16, var_1416_cast_fp16, var_1418_cast_fp16, var_1420_cast_fp16, var_1422_cast_fp16, var_1424_cast_fp16))[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_1429 = const()[name = tensor<string, []>("op_1429"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1431 = const()[name = tensor<string, []>("op_1431"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11241344)))];
+            tensor<fp16, [512]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11765696)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_1431, groups = var_810, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_1429, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> var_1437 = const()[name = tensor<string, []>("op_1437"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_1437, keep_dims = var_811, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_1441 = const()[name = tensor<string, []>("op_1441"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1442_cast_fp16 = reduce_mean(axes = var_1441, keep_dims = var_811, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_1442_cast_fp16")];
+            tensor<fp16, []> var_1443_to_fp16 = const()[name = tensor<string, []>("op_1443_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1444_cast_fp16 = add(x = var_1442_cast_fp16, y = var_1443_to_fp16)[name = tensor<string, []>("op_1444_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_1444_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [512]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11766784)))];
+            tensor<fp16, [512]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11767872)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_1455 = const()[name = tensor<string, []>("op_1455"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1457 = const()[name = tensor<string, []>("op_1457"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11768960)))];
+            tensor<fp16, [2048]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13866176)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_1457, groups = var_810, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_1455, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_1463 = const()[name = tensor<string, []>("op_1463"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1465 = const()[name = tensor<string, []>("op_1465"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13870336)))];
+            tensor<fp16, [512]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15967552)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_1465, groups = var_810, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_1463, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_1472 = const()[name = tensor<string, []>("op_1472"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1485 = const()[name = tensor<string, []>("op_1485"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1486 = const()[name = tensor<string, []>("op_1486"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1496 = const()[name = tensor<string, []>("op_1496"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_1496, keep_dims = var_1486, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_1500 = const()[name = tensor<string, []>("op_1500"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1501_cast_fp16 = reduce_mean(axes = var_1500, keep_dims = var_1486, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_1501_cast_fp16")];
+            tensor<fp16, []> var_1502_to_fp16 = const()[name = tensor<string, []>("op_1502_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1503_cast_fp16 = add(x = var_1501_cast_fp16, y = var_1502_to_fp16)[name = tensor<string, []>("op_1503_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_1503_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [512]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15968640)))];
+            tensor<fp16, [512]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15969728)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_1518 = const()[name = tensor<string, []>("op_1518"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1520 = const()[name = tensor<string, []>("op_1520"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15970816)))];
+            tensor<fp16, [512]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16495168)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_1520, groups = var_1485, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_1518, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_1524 = const()[name = tensor<string, []>("op_1524"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1526 = const()[name = tensor<string, []>("op_1526"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16496256)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_5_cast_fp16 = conv(dilations = var_1526, groups = var_1485, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_1524, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<int32, [2]> var_1531 = const()[name = tensor<string, []>("op_1531"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1533 = const()[name = tensor<string, []>("op_1533"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17020608)))];
+            tensor<fp16, [512]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17544960)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_1533, groups = var_1485, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_1531, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_1540_begin_0 = const()[name = tensor<string, []>("op_1540_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1540_end_0 = const()[name = tensor<string, []>("op_1540_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1540_end_mask_0 = const()[name = tensor<string, []>("op_1540_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1540_cast_fp16 = slice_by_index(begin = var_1540_begin_0, end = var_1540_end_0, end_mask = var_1540_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1540_cast_fp16")];
+            tensor<int32, [4]> var_1544_begin_0 = const()[name = tensor<string, []>("op_1544_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1544_end_0 = const()[name = tensor<string, []>("op_1544_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1544_end_mask_0 = const()[name = tensor<string, []>("op_1544_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1544_cast_fp16 = slice_by_index(begin = var_1544_begin_0, end = var_1544_end_0, end_mask = var_1544_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1544_cast_fp16")];
+            tensor<int32, [4]> var_1548_begin_0 = const()[name = tensor<string, []>("op_1548_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1548_end_0 = const()[name = tensor<string, []>("op_1548_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1548_end_mask_0 = const()[name = tensor<string, []>("op_1548_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1548_cast_fp16 = slice_by_index(begin = var_1548_begin_0, end = var_1548_end_0, end_mask = var_1548_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1548_cast_fp16")];
+            tensor<int32, [4]> var_1552_begin_0 = const()[name = tensor<string, []>("op_1552_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1552_end_0 = const()[name = tensor<string, []>("op_1552_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1552_end_mask_0 = const()[name = tensor<string, []>("op_1552_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1552_cast_fp16 = slice_by_index(begin = var_1552_begin_0, end = var_1552_end_0, end_mask = var_1552_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1552_cast_fp16")];
+            tensor<int32, [4]> var_1556_begin_0 = const()[name = tensor<string, []>("op_1556_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1556_end_0 = const()[name = tensor<string, []>("op_1556_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1556_end_mask_0 = const()[name = tensor<string, []>("op_1556_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1556_cast_fp16 = slice_by_index(begin = var_1556_begin_0, end = var_1556_end_0, end_mask = var_1556_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1556_cast_fp16")];
+            tensor<int32, [4]> var_1560_begin_0 = const()[name = tensor<string, []>("op_1560_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1560_end_0 = const()[name = tensor<string, []>("op_1560_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1560_end_mask_0 = const()[name = tensor<string, []>("op_1560_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1560_cast_fp16 = slice_by_index(begin = var_1560_begin_0, end = var_1560_end_0, end_mask = var_1560_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1560_cast_fp16")];
+            tensor<int32, [4]> var_1564_begin_0 = const()[name = tensor<string, []>("op_1564_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1564_end_0 = const()[name = tensor<string, []>("op_1564_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1564_end_mask_0 = const()[name = tensor<string, []>("op_1564_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<int32, [4]> var_1568_begin_0 = const()[name = tensor<string, []>("op_1568_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1568_end_0 = const()[name = tensor<string, []>("op_1568_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1568_end_mask_0 = const()[name = tensor<string, []>("op_1568_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1568_cast_fp16 = slice_by_index(begin = var_1568_begin_0, end = var_1568_end_0, end_mask = var_1568_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1568_cast_fp16")];
+            tensor<int32, [4]> var_1577_begin_0 = const()[name = tensor<string, []>("op_1577_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1577_end_0 = const()[name = tensor<string, []>("op_1577_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1577_end_mask_0 = const()[name = tensor<string, []>("op_1577_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1577_cast_fp16 = slice_by_index(begin = var_1577_begin_0, end = var_1577_end_0, end_mask = var_1577_end_mask_0, x = var_1540_cast_fp16)[name = tensor<string, []>("op_1577_cast_fp16")];
+            tensor<int32, [4]> var_1584_begin_0 = const()[name = tensor<string, []>("op_1584_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1584_end_0 = const()[name = tensor<string, []>("op_1584_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1584_end_mask_0 = const()[name = tensor<string, []>("op_1584_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1584_cast_fp16 = slice_by_index(begin = var_1584_begin_0, end = var_1584_end_0, end_mask = var_1584_end_mask_0, x = var_1540_cast_fp16)[name = tensor<string, []>("op_1584_cast_fp16")];
+            tensor<int32, [4]> var_1591_begin_0 = const()[name = tensor<string, []>("op_1591_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1591_end_0 = const()[name = tensor<string, []>("op_1591_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1591_end_mask_0 = const()[name = tensor<string, []>("op_1591_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1591_cast_fp16 = slice_by_index(begin = var_1591_begin_0, end = var_1591_end_0, end_mask = var_1591_end_mask_0, x = var_1540_cast_fp16)[name = tensor<string, []>("op_1591_cast_fp16")];
+            tensor<int32, [4]> var_1598_begin_0 = const()[name = tensor<string, []>("op_1598_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1598_end_0 = const()[name = tensor<string, []>("op_1598_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1598_end_mask_0 = const()[name = tensor<string, []>("op_1598_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1598_cast_fp16 = slice_by_index(begin = var_1598_begin_0, end = var_1598_end_0, end_mask = var_1598_end_mask_0, x = var_1540_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<int32, [4]> var_1605_begin_0 = const()[name = tensor<string, []>("op_1605_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1605_end_0 = const()[name = tensor<string, []>("op_1605_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1605_end_mask_0 = const()[name = tensor<string, []>("op_1605_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1605_cast_fp16 = slice_by_index(begin = var_1605_begin_0, end = var_1605_end_0, end_mask = var_1605_end_mask_0, x = var_1544_cast_fp16)[name = tensor<string, []>("op_1605_cast_fp16")];
+            tensor<int32, [4]> var_1612_begin_0 = const()[name = tensor<string, []>("op_1612_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1612_end_0 = const()[name = tensor<string, []>("op_1612_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1612_end_mask_0 = const()[name = tensor<string, []>("op_1612_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1612_cast_fp16 = slice_by_index(begin = var_1612_begin_0, end = var_1612_end_0, end_mask = var_1612_end_mask_0, x = var_1544_cast_fp16)[name = tensor<string, []>("op_1612_cast_fp16")];
+            tensor<int32, [4]> var_1619_begin_0 = const()[name = tensor<string, []>("op_1619_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1619_end_0 = const()[name = tensor<string, []>("op_1619_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1619_end_mask_0 = const()[name = tensor<string, []>("op_1619_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1619_cast_fp16 = slice_by_index(begin = var_1619_begin_0, end = var_1619_end_0, end_mask = var_1619_end_mask_0, x = var_1544_cast_fp16)[name = tensor<string, []>("op_1619_cast_fp16")];
+            tensor<int32, [4]> var_1626_begin_0 = const()[name = tensor<string, []>("op_1626_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1626_end_0 = const()[name = tensor<string, []>("op_1626_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1626_end_mask_0 = const()[name = tensor<string, []>("op_1626_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1626_cast_fp16 = slice_by_index(begin = var_1626_begin_0, end = var_1626_end_0, end_mask = var_1626_end_mask_0, x = var_1544_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<int32, [4]> var_1633_begin_0 = const()[name = tensor<string, []>("op_1633_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1633_end_0 = const()[name = tensor<string, []>("op_1633_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1633_end_mask_0 = const()[name = tensor<string, []>("op_1633_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1633_cast_fp16 = slice_by_index(begin = var_1633_begin_0, end = var_1633_end_0, end_mask = var_1633_end_mask_0, x = var_1548_cast_fp16)[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<int32, [4]> var_1640_begin_0 = const()[name = tensor<string, []>("op_1640_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1640_end_0 = const()[name = tensor<string, []>("op_1640_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1640_end_mask_0 = const()[name = tensor<string, []>("op_1640_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1640_cast_fp16 = slice_by_index(begin = var_1640_begin_0, end = var_1640_end_0, end_mask = var_1640_end_mask_0, x = var_1548_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<int32, [4]> var_1647_begin_0 = const()[name = tensor<string, []>("op_1647_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1647_end_0 = const()[name = tensor<string, []>("op_1647_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1647_end_mask_0 = const()[name = tensor<string, []>("op_1647_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1647_cast_fp16 = slice_by_index(begin = var_1647_begin_0, end = var_1647_end_0, end_mask = var_1647_end_mask_0, x = var_1548_cast_fp16)[name = tensor<string, []>("op_1647_cast_fp16")];
+            tensor<int32, [4]> var_1654_begin_0 = const()[name = tensor<string, []>("op_1654_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1654_end_0 = const()[name = tensor<string, []>("op_1654_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1654_end_mask_0 = const()[name = tensor<string, []>("op_1654_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1654_cast_fp16 = slice_by_index(begin = var_1654_begin_0, end = var_1654_end_0, end_mask = var_1654_end_mask_0, x = var_1548_cast_fp16)[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<int32, [4]> var_1661_begin_0 = const()[name = tensor<string, []>("op_1661_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1661_end_0 = const()[name = tensor<string, []>("op_1661_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1661_end_mask_0 = const()[name = tensor<string, []>("op_1661_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1661_cast_fp16 = slice_by_index(begin = var_1661_begin_0, end = var_1661_end_0, end_mask = var_1661_end_mask_0, x = var_1552_cast_fp16)[name = tensor<string, []>("op_1661_cast_fp16")];
+            tensor<int32, [4]> var_1668_begin_0 = const()[name = tensor<string, []>("op_1668_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1668_end_0 = const()[name = tensor<string, []>("op_1668_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1668_end_mask_0 = const()[name = tensor<string, []>("op_1668_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1668_cast_fp16 = slice_by_index(begin = var_1668_begin_0, end = var_1668_end_0, end_mask = var_1668_end_mask_0, x = var_1552_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<int32, [4]> var_1675_begin_0 = const()[name = tensor<string, []>("op_1675_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1675_end_0 = const()[name = tensor<string, []>("op_1675_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1675_end_mask_0 = const()[name = tensor<string, []>("op_1675_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1675_cast_fp16 = slice_by_index(begin = var_1675_begin_0, end = var_1675_end_0, end_mask = var_1675_end_mask_0, x = var_1552_cast_fp16)[name = tensor<string, []>("op_1675_cast_fp16")];
+            tensor<int32, [4]> var_1682_begin_0 = const()[name = tensor<string, []>("op_1682_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1682_end_0 = const()[name = tensor<string, []>("op_1682_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1682_end_mask_0 = const()[name = tensor<string, []>("op_1682_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1682_cast_fp16 = slice_by_index(begin = var_1682_begin_0, end = var_1682_end_0, end_mask = var_1682_end_mask_0, x = var_1552_cast_fp16)[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<int32, [4]> var_1689_begin_0 = const()[name = tensor<string, []>("op_1689_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1689_end_0 = const()[name = tensor<string, []>("op_1689_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1689_end_mask_0 = const()[name = tensor<string, []>("op_1689_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1689_cast_fp16 = slice_by_index(begin = var_1689_begin_0, end = var_1689_end_0, end_mask = var_1689_end_mask_0, x = var_1556_cast_fp16)[name = tensor<string, []>("op_1689_cast_fp16")];
+            tensor<int32, [4]> var_1696_begin_0 = const()[name = tensor<string, []>("op_1696_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1696_end_0 = const()[name = tensor<string, []>("op_1696_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1696_end_mask_0 = const()[name = tensor<string, []>("op_1696_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1696_cast_fp16 = slice_by_index(begin = var_1696_begin_0, end = var_1696_end_0, end_mask = var_1696_end_mask_0, x = var_1556_cast_fp16)[name = tensor<string, []>("op_1696_cast_fp16")];
+            tensor<int32, [4]> var_1703_begin_0 = const()[name = tensor<string, []>("op_1703_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1703_end_0 = const()[name = tensor<string, []>("op_1703_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1703_end_mask_0 = const()[name = tensor<string, []>("op_1703_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1703_cast_fp16 = slice_by_index(begin = var_1703_begin_0, end = var_1703_end_0, end_mask = var_1703_end_mask_0, x = var_1556_cast_fp16)[name = tensor<string, []>("op_1703_cast_fp16")];
+            tensor<int32, [4]> var_1710_begin_0 = const()[name = tensor<string, []>("op_1710_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1710_end_0 = const()[name = tensor<string, []>("op_1710_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1710_end_mask_0 = const()[name = tensor<string, []>("op_1710_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1710_cast_fp16 = slice_by_index(begin = var_1710_begin_0, end = var_1710_end_0, end_mask = var_1710_end_mask_0, x = var_1556_cast_fp16)[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<int32, [4]> var_1717_begin_0 = const()[name = tensor<string, []>("op_1717_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1717_end_0 = const()[name = tensor<string, []>("op_1717_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1717_end_mask_0 = const()[name = tensor<string, []>("op_1717_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1717_cast_fp16 = slice_by_index(begin = var_1717_begin_0, end = var_1717_end_0, end_mask = var_1717_end_mask_0, x = var_1560_cast_fp16)[name = tensor<string, []>("op_1717_cast_fp16")];
+            tensor<int32, [4]> var_1724_begin_0 = const()[name = tensor<string, []>("op_1724_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1724_end_0 = const()[name = tensor<string, []>("op_1724_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1724_end_mask_0 = const()[name = tensor<string, []>("op_1724_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1724_cast_fp16 = slice_by_index(begin = var_1724_begin_0, end = var_1724_end_0, end_mask = var_1724_end_mask_0, x = var_1560_cast_fp16)[name = tensor<string, []>("op_1724_cast_fp16")];
+            tensor<int32, [4]> var_1731_begin_0 = const()[name = tensor<string, []>("op_1731_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1731_end_0 = const()[name = tensor<string, []>("op_1731_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1731_end_mask_0 = const()[name = tensor<string, []>("op_1731_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1731_cast_fp16 = slice_by_index(begin = var_1731_begin_0, end = var_1731_end_0, end_mask = var_1731_end_mask_0, x = var_1560_cast_fp16)[name = tensor<string, []>("op_1731_cast_fp16")];
+            tensor<int32, [4]> var_1738_begin_0 = const()[name = tensor<string, []>("op_1738_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1738_end_0 = const()[name = tensor<string, []>("op_1738_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1738_end_mask_0 = const()[name = tensor<string, []>("op_1738_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1738_cast_fp16 = slice_by_index(begin = var_1738_begin_0, end = var_1738_end_0, end_mask = var_1738_end_mask_0, x = var_1560_cast_fp16)[name = tensor<string, []>("op_1738_cast_fp16")];
+            tensor<int32, [4]> var_1745_begin_0 = const()[name = tensor<string, []>("op_1745_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1745_end_0 = const()[name = tensor<string, []>("op_1745_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1745_end_mask_0 = const()[name = tensor<string, []>("op_1745_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = var_1745_end_0, end_mask = var_1745_end_mask_0, x = var_1564_cast_fp16)[name = tensor<string, []>("op_1745_cast_fp16")];
+            tensor<int32, [4]> var_1752_begin_0 = const()[name = tensor<string, []>("op_1752_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1752_end_0 = const()[name = tensor<string, []>("op_1752_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1752_end_mask_0 = const()[name = tensor<string, []>("op_1752_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1752_cast_fp16 = slice_by_index(begin = var_1752_begin_0, end = var_1752_end_0, end_mask = var_1752_end_mask_0, x = var_1564_cast_fp16)[name = tensor<string, []>("op_1752_cast_fp16")];
+            tensor<int32, [4]> var_1759_begin_0 = const()[name = tensor<string, []>("op_1759_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1759_end_0 = const()[name = tensor<string, []>("op_1759_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1759_end_mask_0 = const()[name = tensor<string, []>("op_1759_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1759_cast_fp16 = slice_by_index(begin = var_1759_begin_0, end = var_1759_end_0, end_mask = var_1759_end_mask_0, x = var_1564_cast_fp16)[name = tensor<string, []>("op_1759_cast_fp16")];
+            tensor<int32, [4]> var_1766_begin_0 = const()[name = tensor<string, []>("op_1766_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1766_end_0 = const()[name = tensor<string, []>("op_1766_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1766_end_mask_0 = const()[name = tensor<string, []>("op_1766_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1766_cast_fp16 = slice_by_index(begin = var_1766_begin_0, end = var_1766_end_0, end_mask = var_1766_end_mask_0, x = var_1564_cast_fp16)[name = tensor<string, []>("op_1766_cast_fp16")];
+            tensor<int32, [4]> var_1773_begin_0 = const()[name = tensor<string, []>("op_1773_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1773_end_0 = const()[name = tensor<string, []>("op_1773_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1773_end_mask_0 = const()[name = tensor<string, []>("op_1773_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1773_cast_fp16 = slice_by_index(begin = var_1773_begin_0, end = var_1773_end_0, end_mask = var_1773_end_mask_0, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1773_cast_fp16")];
+            tensor<int32, [4]> var_1780_begin_0 = const()[name = tensor<string, []>("op_1780_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1780_end_0 = const()[name = tensor<string, []>("op_1780_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1780_end_mask_0 = const()[name = tensor<string, []>("op_1780_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1780_cast_fp16 = slice_by_index(begin = var_1780_begin_0, end = var_1780_end_0, end_mask = var_1780_end_mask_0, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1780_cast_fp16")];
+            tensor<int32, [4]> var_1787_begin_0 = const()[name = tensor<string, []>("op_1787_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1787_end_0 = const()[name = tensor<string, []>("op_1787_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1787_end_mask_0 = const()[name = tensor<string, []>("op_1787_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [4]> var_1794_begin_0 = const()[name = tensor<string, []>("op_1794_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1794_end_0 = const()[name = tensor<string, []>("op_1794_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1794_end_mask_0 = const()[name = tensor<string, []>("op_1794_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1794_cast_fp16 = slice_by_index(begin = var_1794_begin_0, end = var_1794_end_0, end_mask = var_1794_end_mask_0, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1794_cast_fp16")];
+            tensor<int32, [4]> k_5_perm_0 = const()[name = tensor<string, []>("k_5_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1799_begin_0 = const()[name = tensor<string, []>("op_1799_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1799_end_0 = const()[name = tensor<string, []>("op_1799_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1799_end_mask_0 = const()[name = tensor<string, []>("op_1799_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_3 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1799_cast_fp16 = slice_by_index(begin = var_1799_begin_0, end = var_1799_end_0, end_mask = var_1799_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1799_cast_fp16")];
+            tensor<int32, [4]> var_1803_begin_0 = const()[name = tensor<string, []>("op_1803_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1803_end_0 = const()[name = tensor<string, []>("op_1803_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1803_end_mask_0 = const()[name = tensor<string, []>("op_1803_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1803_cast_fp16 = slice_by_index(begin = var_1803_begin_0, end = var_1803_end_0, end_mask = var_1803_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1803_cast_fp16")];
+            tensor<int32, [4]> var_1807_begin_0 = const()[name = tensor<string, []>("op_1807_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1807_end_0 = const()[name = tensor<string, []>("op_1807_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1807_end_mask_0 = const()[name = tensor<string, []>("op_1807_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1807_cast_fp16 = slice_by_index(begin = var_1807_begin_0, end = var_1807_end_0, end_mask = var_1807_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1807_cast_fp16")];
+            tensor<int32, [4]> var_1811_begin_0 = const()[name = tensor<string, []>("op_1811_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1811_end_0 = const()[name = tensor<string, []>("op_1811_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1811_end_mask_0 = const()[name = tensor<string, []>("op_1811_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1811_cast_fp16 = slice_by_index(begin = var_1811_begin_0, end = var_1811_end_0, end_mask = var_1811_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1811_cast_fp16")];
+            tensor<int32, [4]> var_1815_begin_0 = const()[name = tensor<string, []>("op_1815_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1815_end_0 = const()[name = tensor<string, []>("op_1815_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1815_end_mask_0 = const()[name = tensor<string, []>("op_1815_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1815_cast_fp16 = slice_by_index(begin = var_1815_begin_0, end = var_1815_end_0, end_mask = var_1815_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1815_cast_fp16")];
+            tensor<int32, [4]> var_1819_begin_0 = const()[name = tensor<string, []>("op_1819_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1819_end_0 = const()[name = tensor<string, []>("op_1819_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1819_end_mask_0 = const()[name = tensor<string, []>("op_1819_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1819_cast_fp16 = slice_by_index(begin = var_1819_begin_0, end = var_1819_end_0, end_mask = var_1819_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1819_cast_fp16")];
+            tensor<int32, [4]> var_1823_begin_0 = const()[name = tensor<string, []>("op_1823_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_1823_end_0 = const()[name = tensor<string, []>("op_1823_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_1823_end_mask_0 = const()[name = tensor<string, []>("op_1823_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1823_cast_fp16 = slice_by_index(begin = var_1823_begin_0, end = var_1823_end_0, end_mask = var_1823_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1823_cast_fp16")];
+            tensor<int32, [4]> var_1827_begin_0 = const()[name = tensor<string, []>("op_1827_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_1827_end_0 = const()[name = tensor<string, []>("op_1827_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_1827_end_mask_0 = const()[name = tensor<string, []>("op_1827_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1827_cast_fp16 = slice_by_index(begin = var_1827_begin_0, end = var_1827_end_0, end_mask = var_1827_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_1827_cast_fp16")];
+            tensor<int32, [4]> var_1829_begin_0 = const()[name = tensor<string, []>("op_1829_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1829_end_0 = const()[name = tensor<string, []>("op_1829_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1829_end_mask_0 = const()[name = tensor<string, []>("op_1829_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1829_cast_fp16 = slice_by_index(begin = var_1829_begin_0, end = var_1829_end_0, end_mask = var_1829_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1829_cast_fp16")];
+            tensor<int32, [4]> var_1833_begin_0 = const()[name = tensor<string, []>("op_1833_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1833_end_0 = const()[name = tensor<string, []>("op_1833_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1833_end_mask_0 = const()[name = tensor<string, []>("op_1833_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16 = slice_by_index(begin = var_1833_begin_0, end = var_1833_end_0, end_mask = var_1833_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<int32, [4]> var_1837_begin_0 = const()[name = tensor<string, []>("op_1837_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1837_end_0 = const()[name = tensor<string, []>("op_1837_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1837_end_mask_0 = const()[name = tensor<string, []>("op_1837_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1837_cast_fp16 = slice_by_index(begin = var_1837_begin_0, end = var_1837_end_0, end_mask = var_1837_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<int32, [4]> var_1841_begin_0 = const()[name = tensor<string, []>("op_1841_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1841_end_0 = const()[name = tensor<string, []>("op_1841_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1841_end_mask_0 = const()[name = tensor<string, []>("op_1841_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1841_cast_fp16 = slice_by_index(begin = var_1841_begin_0, end = var_1841_end_0, end_mask = var_1841_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
+            tensor<int32, [4]> var_1845_begin_0 = const()[name = tensor<string, []>("op_1845_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1845_end_0 = const()[name = tensor<string, []>("op_1845_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1845_end_mask_0 = const()[name = tensor<string, []>("op_1845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1845_cast_fp16 = slice_by_index(begin = var_1845_begin_0, end = var_1845_end_0, end_mask = var_1845_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1845_cast_fp16")];
+            tensor<int32, [4]> var_1849_begin_0 = const()[name = tensor<string, []>("op_1849_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1849_end_0 = const()[name = tensor<string, []>("op_1849_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1849_end_mask_0 = const()[name = tensor<string, []>("op_1849_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1849_cast_fp16 = slice_by_index(begin = var_1849_begin_0, end = var_1849_end_0, end_mask = var_1849_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1849_cast_fp16")];
+            tensor<int32, [4]> var_1853_begin_0 = const()[name = tensor<string, []>("op_1853_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1853_end_0 = const()[name = tensor<string, []>("op_1853_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1853_end_mask_0 = const()[name = tensor<string, []>("op_1853_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1853_cast_fp16 = slice_by_index(begin = var_1853_begin_0, end = var_1853_end_0, end_mask = var_1853_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1853_cast_fp16")];
+            tensor<int32, [4]> var_1857_begin_0 = const()[name = tensor<string, []>("op_1857_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1857_end_0 = const()[name = tensor<string, []>("op_1857_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1857_end_mask_0 = const()[name = tensor<string, []>("op_1857_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1857_cast_fp16 = slice_by_index(begin = var_1857_begin_0, end = var_1857_end_0, end_mask = var_1857_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1857_cast_fp16")];
+            tensor<string, []> var_1861_equation_0 = const()[name = tensor<string, []>("op_1861_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1861_cast_fp16 = einsum(equation = var_1861_equation_0, values = (var_1799_cast_fp16, var_1577_cast_fp16))[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<fp16, []> var_1862_to_fp16 = const()[name = tensor<string, []>("op_1862_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_129_cast_fp16 = mul(x = var_1861_cast_fp16, y = var_1862_to_fp16)[name = tensor<string, []>("aw_chunk_129_cast_fp16")];
+            tensor<string, []> var_1865_equation_0 = const()[name = tensor<string, []>("op_1865_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1865_cast_fp16 = einsum(equation = var_1865_equation_0, values = (var_1799_cast_fp16, var_1584_cast_fp16))[name = tensor<string, []>("op_1865_cast_fp16")];
+            tensor<fp16, []> var_1866_to_fp16 = const()[name = tensor<string, []>("op_1866_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_131_cast_fp16 = mul(x = var_1865_cast_fp16, y = var_1866_to_fp16)[name = tensor<string, []>("aw_chunk_131_cast_fp16")];
+            tensor<string, []> var_1869_equation_0 = const()[name = tensor<string, []>("op_1869_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1869_cast_fp16 = einsum(equation = var_1869_equation_0, values = (var_1799_cast_fp16, var_1591_cast_fp16))[name = tensor<string, []>("op_1869_cast_fp16")];
+            tensor<fp16, []> var_1870_to_fp16 = const()[name = tensor<string, []>("op_1870_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_133_cast_fp16 = mul(x = var_1869_cast_fp16, y = var_1870_to_fp16)[name = tensor<string, []>("aw_chunk_133_cast_fp16")];
+            tensor<string, []> var_1873_equation_0 = const()[name = tensor<string, []>("op_1873_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1873_cast_fp16 = einsum(equation = var_1873_equation_0, values = (var_1799_cast_fp16, var_1598_cast_fp16))[name = tensor<string, []>("op_1873_cast_fp16")];
+            tensor<fp16, []> var_1874_to_fp16 = const()[name = tensor<string, []>("op_1874_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_135_cast_fp16 = mul(x = var_1873_cast_fp16, y = var_1874_to_fp16)[name = tensor<string, []>("aw_chunk_135_cast_fp16")];
+            tensor<string, []> var_1877_equation_0 = const()[name = tensor<string, []>("op_1877_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1877_cast_fp16 = einsum(equation = var_1877_equation_0, values = (var_1803_cast_fp16, var_1605_cast_fp16))[name = tensor<string, []>("op_1877_cast_fp16")];
+            tensor<fp16, []> var_1878_to_fp16 = const()[name = tensor<string, []>("op_1878_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_137_cast_fp16 = mul(x = var_1877_cast_fp16, y = var_1878_to_fp16)[name = tensor<string, []>("aw_chunk_137_cast_fp16")];
+            tensor<string, []> var_1881_equation_0 = const()[name = tensor<string, []>("op_1881_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1881_cast_fp16 = einsum(equation = var_1881_equation_0, values = (var_1803_cast_fp16, var_1612_cast_fp16))[name = tensor<string, []>("op_1881_cast_fp16")];
+            tensor<fp16, []> var_1882_to_fp16 = const()[name = tensor<string, []>("op_1882_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_139_cast_fp16 = mul(x = var_1881_cast_fp16, y = var_1882_to_fp16)[name = tensor<string, []>("aw_chunk_139_cast_fp16")];
+            tensor<string, []> var_1885_equation_0 = const()[name = tensor<string, []>("op_1885_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1885_cast_fp16 = einsum(equation = var_1885_equation_0, values = (var_1803_cast_fp16, var_1619_cast_fp16))[name = tensor<string, []>("op_1885_cast_fp16")];
+            tensor<fp16, []> var_1886_to_fp16 = const()[name = tensor<string, []>("op_1886_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_141_cast_fp16 = mul(x = var_1885_cast_fp16, y = var_1886_to_fp16)[name = tensor<string, []>("aw_chunk_141_cast_fp16")];
+            tensor<string, []> var_1889_equation_0 = const()[name = tensor<string, []>("op_1889_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1889_cast_fp16 = einsum(equation = var_1889_equation_0, values = (var_1803_cast_fp16, var_1626_cast_fp16))[name = tensor<string, []>("op_1889_cast_fp16")];
+            tensor<fp16, []> var_1890_to_fp16 = const()[name = tensor<string, []>("op_1890_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_143_cast_fp16 = mul(x = var_1889_cast_fp16, y = var_1890_to_fp16)[name = tensor<string, []>("aw_chunk_143_cast_fp16")];
+            tensor<string, []> var_1893_equation_0 = const()[name = tensor<string, []>("op_1893_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1893_cast_fp16 = einsum(equation = var_1893_equation_0, values = (var_1807_cast_fp16, var_1633_cast_fp16))[name = tensor<string, []>("op_1893_cast_fp16")];
+            tensor<fp16, []> var_1894_to_fp16 = const()[name = tensor<string, []>("op_1894_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_145_cast_fp16 = mul(x = var_1893_cast_fp16, y = var_1894_to_fp16)[name = tensor<string, []>("aw_chunk_145_cast_fp16")];
+            tensor<string, []> var_1897_equation_0 = const()[name = tensor<string, []>("op_1897_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1897_cast_fp16 = einsum(equation = var_1897_equation_0, values = (var_1807_cast_fp16, var_1640_cast_fp16))[name = tensor<string, []>("op_1897_cast_fp16")];
+            tensor<fp16, []> var_1898_to_fp16 = const()[name = tensor<string, []>("op_1898_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_147_cast_fp16 = mul(x = var_1897_cast_fp16, y = var_1898_to_fp16)[name = tensor<string, []>("aw_chunk_147_cast_fp16")];
+            tensor<string, []> var_1901_equation_0 = const()[name = tensor<string, []>("op_1901_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1901_cast_fp16 = einsum(equation = var_1901_equation_0, values = (var_1807_cast_fp16, var_1647_cast_fp16))[name = tensor<string, []>("op_1901_cast_fp16")];
+            tensor<fp16, []> var_1902_to_fp16 = const()[name = tensor<string, []>("op_1902_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_149_cast_fp16 = mul(x = var_1901_cast_fp16, y = var_1902_to_fp16)[name = tensor<string, []>("aw_chunk_149_cast_fp16")];
+            tensor<string, []> var_1905_equation_0 = const()[name = tensor<string, []>("op_1905_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1905_cast_fp16 = einsum(equation = var_1905_equation_0, values = (var_1807_cast_fp16, var_1654_cast_fp16))[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, []> var_1906_to_fp16 = const()[name = tensor<string, []>("op_1906_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_151_cast_fp16 = mul(x = var_1905_cast_fp16, y = var_1906_to_fp16)[name = tensor<string, []>("aw_chunk_151_cast_fp16")];
+            tensor<string, []> var_1909_equation_0 = const()[name = tensor<string, []>("op_1909_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1909_cast_fp16 = einsum(equation = var_1909_equation_0, values = (var_1811_cast_fp16, var_1661_cast_fp16))[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, []> var_1910_to_fp16 = const()[name = tensor<string, []>("op_1910_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_153_cast_fp16 = mul(x = var_1909_cast_fp16, y = var_1910_to_fp16)[name = tensor<string, []>("aw_chunk_153_cast_fp16")];
+            tensor<string, []> var_1913_equation_0 = const()[name = tensor<string, []>("op_1913_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1913_cast_fp16 = einsum(equation = var_1913_equation_0, values = (var_1811_cast_fp16, var_1668_cast_fp16))[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<fp16, []> var_1914_to_fp16 = const()[name = tensor<string, []>("op_1914_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_155_cast_fp16 = mul(x = var_1913_cast_fp16, y = var_1914_to_fp16)[name = tensor<string, []>("aw_chunk_155_cast_fp16")];
+            tensor<string, []> var_1917_equation_0 = const()[name = tensor<string, []>("op_1917_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1917_cast_fp16 = einsum(equation = var_1917_equation_0, values = (var_1811_cast_fp16, var_1675_cast_fp16))[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<fp16, []> var_1918_to_fp16 = const()[name = tensor<string, []>("op_1918_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_157_cast_fp16 = mul(x = var_1917_cast_fp16, y = var_1918_to_fp16)[name = tensor<string, []>("aw_chunk_157_cast_fp16")];
+            tensor<string, []> var_1921_equation_0 = const()[name = tensor<string, []>("op_1921_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1921_cast_fp16 = einsum(equation = var_1921_equation_0, values = (var_1811_cast_fp16, var_1682_cast_fp16))[name = tensor<string, []>("op_1921_cast_fp16")];
+            tensor<fp16, []> var_1922_to_fp16 = const()[name = tensor<string, []>("op_1922_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_159_cast_fp16 = mul(x = var_1921_cast_fp16, y = var_1922_to_fp16)[name = tensor<string, []>("aw_chunk_159_cast_fp16")];
+            tensor<string, []> var_1925_equation_0 = const()[name = tensor<string, []>("op_1925_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1925_cast_fp16 = einsum(equation = var_1925_equation_0, values = (var_1815_cast_fp16, var_1689_cast_fp16))[name = tensor<string, []>("op_1925_cast_fp16")];
+            tensor<fp16, []> var_1926_to_fp16 = const()[name = tensor<string, []>("op_1926_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_161_cast_fp16 = mul(x = var_1925_cast_fp16, y = var_1926_to_fp16)[name = tensor<string, []>("aw_chunk_161_cast_fp16")];
+            tensor<string, []> var_1929_equation_0 = const()[name = tensor<string, []>("op_1929_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1929_cast_fp16 = einsum(equation = var_1929_equation_0, values = (var_1815_cast_fp16, var_1696_cast_fp16))[name = tensor<string, []>("op_1929_cast_fp16")];
+            tensor<fp16, []> var_1930_to_fp16 = const()[name = tensor<string, []>("op_1930_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_163_cast_fp16 = mul(x = var_1929_cast_fp16, y = var_1930_to_fp16)[name = tensor<string, []>("aw_chunk_163_cast_fp16")];
+            tensor<string, []> var_1933_equation_0 = const()[name = tensor<string, []>("op_1933_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1933_cast_fp16 = einsum(equation = var_1933_equation_0, values = (var_1815_cast_fp16, var_1703_cast_fp16))[name = tensor<string, []>("op_1933_cast_fp16")];
+            tensor<fp16, []> var_1934_to_fp16 = const()[name = tensor<string, []>("op_1934_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_165_cast_fp16 = mul(x = var_1933_cast_fp16, y = var_1934_to_fp16)[name = tensor<string, []>("aw_chunk_165_cast_fp16")];
+            tensor<string, []> var_1937_equation_0 = const()[name = tensor<string, []>("op_1937_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1937_cast_fp16 = einsum(equation = var_1937_equation_0, values = (var_1815_cast_fp16, var_1710_cast_fp16))[name = tensor<string, []>("op_1937_cast_fp16")];
+            tensor<fp16, []> var_1938_to_fp16 = const()[name = tensor<string, []>("op_1938_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_167_cast_fp16 = mul(x = var_1937_cast_fp16, y = var_1938_to_fp16)[name = tensor<string, []>("aw_chunk_167_cast_fp16")];
+            tensor<string, []> var_1941_equation_0 = const()[name = tensor<string, []>("op_1941_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1941_cast_fp16 = einsum(equation = var_1941_equation_0, values = (var_1819_cast_fp16, var_1717_cast_fp16))[name = tensor<string, []>("op_1941_cast_fp16")];
+            tensor<fp16, []> var_1942_to_fp16 = const()[name = tensor<string, []>("op_1942_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_169_cast_fp16 = mul(x = var_1941_cast_fp16, y = var_1942_to_fp16)[name = tensor<string, []>("aw_chunk_169_cast_fp16")];
+            tensor<string, []> var_1945_equation_0 = const()[name = tensor<string, []>("op_1945_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1945_cast_fp16 = einsum(equation = var_1945_equation_0, values = (var_1819_cast_fp16, var_1724_cast_fp16))[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<fp16, []> var_1946_to_fp16 = const()[name = tensor<string, []>("op_1946_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_171_cast_fp16 = mul(x = var_1945_cast_fp16, y = var_1946_to_fp16)[name = tensor<string, []>("aw_chunk_171_cast_fp16")];
+            tensor<string, []> var_1949_equation_0 = const()[name = tensor<string, []>("op_1949_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1949_cast_fp16 = einsum(equation = var_1949_equation_0, values = (var_1819_cast_fp16, var_1731_cast_fp16))[name = tensor<string, []>("op_1949_cast_fp16")];
+            tensor<fp16, []> var_1950_to_fp16 = const()[name = tensor<string, []>("op_1950_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_173_cast_fp16 = mul(x = var_1949_cast_fp16, y = var_1950_to_fp16)[name = tensor<string, []>("aw_chunk_173_cast_fp16")];
+            tensor<string, []> var_1953_equation_0 = const()[name = tensor<string, []>("op_1953_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1953_cast_fp16 = einsum(equation = var_1953_equation_0, values = (var_1819_cast_fp16, var_1738_cast_fp16))[name = tensor<string, []>("op_1953_cast_fp16")];
+            tensor<fp16, []> var_1954_to_fp16 = const()[name = tensor<string, []>("op_1954_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_175_cast_fp16 = mul(x = var_1953_cast_fp16, y = var_1954_to_fp16)[name = tensor<string, []>("aw_chunk_175_cast_fp16")];
+            tensor<string, []> var_1957_equation_0 = const()[name = tensor<string, []>("op_1957_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1957_cast_fp16 = einsum(equation = var_1957_equation_0, values = (var_1823_cast_fp16, var_1745_cast_fp16))[name = tensor<string, []>("op_1957_cast_fp16")];
+            tensor<fp16, []> var_1958_to_fp16 = const()[name = tensor<string, []>("op_1958_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_177_cast_fp16 = mul(x = var_1957_cast_fp16, y = var_1958_to_fp16)[name = tensor<string, []>("aw_chunk_177_cast_fp16")];
+            tensor<string, []> var_1961_equation_0 = const()[name = tensor<string, []>("op_1961_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1961_cast_fp16 = einsum(equation = var_1961_equation_0, values = (var_1823_cast_fp16, var_1752_cast_fp16))[name = tensor<string, []>("op_1961_cast_fp16")];
+            tensor<fp16, []> var_1962_to_fp16 = const()[name = tensor<string, []>("op_1962_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_179_cast_fp16 = mul(x = var_1961_cast_fp16, y = var_1962_to_fp16)[name = tensor<string, []>("aw_chunk_179_cast_fp16")];
+            tensor<string, []> var_1965_equation_0 = const()[name = tensor<string, []>("op_1965_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1965_cast_fp16 = einsum(equation = var_1965_equation_0, values = (var_1823_cast_fp16, var_1759_cast_fp16))[name = tensor<string, []>("op_1965_cast_fp16")];
+            tensor<fp16, []> var_1966_to_fp16 = const()[name = tensor<string, []>("op_1966_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_181_cast_fp16 = mul(x = var_1965_cast_fp16, y = var_1966_to_fp16)[name = tensor<string, []>("aw_chunk_181_cast_fp16")];
+            tensor<string, []> var_1969_equation_0 = const()[name = tensor<string, []>("op_1969_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1969_cast_fp16 = einsum(equation = var_1969_equation_0, values = (var_1823_cast_fp16, var_1766_cast_fp16))[name = tensor<string, []>("op_1969_cast_fp16")];
+            tensor<fp16, []> var_1970_to_fp16 = const()[name = tensor<string, []>("op_1970_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_183_cast_fp16 = mul(x = var_1969_cast_fp16, y = var_1970_to_fp16)[name = tensor<string, []>("aw_chunk_183_cast_fp16")];
+            tensor<string, []> var_1973_equation_0 = const()[name = tensor<string, []>("op_1973_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1973_cast_fp16 = einsum(equation = var_1973_equation_0, values = (var_1827_cast_fp16, var_1773_cast_fp16))[name = tensor<string, []>("op_1973_cast_fp16")];
+            tensor<fp16, []> var_1974_to_fp16 = const()[name = tensor<string, []>("op_1974_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_185_cast_fp16 = mul(x = var_1973_cast_fp16, y = var_1974_to_fp16)[name = tensor<string, []>("aw_chunk_185_cast_fp16")];
+            tensor<string, []> var_1977_equation_0 = const()[name = tensor<string, []>("op_1977_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1977_cast_fp16 = einsum(equation = var_1977_equation_0, values = (var_1827_cast_fp16, var_1780_cast_fp16))[name = tensor<string, []>("op_1977_cast_fp16")];
+            tensor<fp16, []> var_1978_to_fp16 = const()[name = tensor<string, []>("op_1978_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_187_cast_fp16 = mul(x = var_1977_cast_fp16, y = var_1978_to_fp16)[name = tensor<string, []>("aw_chunk_187_cast_fp16")];
+            tensor<string, []> var_1981_equation_0 = const()[name = tensor<string, []>("op_1981_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1981_cast_fp16 = einsum(equation = var_1981_equation_0, values = (var_1827_cast_fp16, var_1787_cast_fp16))[name = tensor<string, []>("op_1981_cast_fp16")];
+            tensor<fp16, []> var_1982_to_fp16 = const()[name = tensor<string, []>("op_1982_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_189_cast_fp16 = mul(x = var_1981_cast_fp16, y = var_1982_to_fp16)[name = tensor<string, []>("aw_chunk_189_cast_fp16")];
+            tensor<string, []> var_1985_equation_0 = const()[name = tensor<string, []>("op_1985_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1985_cast_fp16 = einsum(equation = var_1985_equation_0, values = (var_1827_cast_fp16, var_1794_cast_fp16))[name = tensor<string, []>("op_1985_cast_fp16")];
+            tensor<fp16, []> var_1986_to_fp16 = const()[name = tensor<string, []>("op_1986_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_191_cast_fp16 = mul(x = var_1985_cast_fp16, y = var_1986_to_fp16)[name = tensor<string, []>("aw_chunk_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1988_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_129_cast_fp16)[name = tensor<string, []>("op_1988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1989_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_131_cast_fp16)[name = tensor<string, []>("op_1989_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1990_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_133_cast_fp16)[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1991_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_135_cast_fp16)[name = tensor<string, []>("op_1991_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1992_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_137_cast_fp16)[name = tensor<string, []>("op_1992_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1993_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_139_cast_fp16)[name = tensor<string, []>("op_1993_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1994_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_141_cast_fp16)[name = tensor<string, []>("op_1994_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1995_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_143_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1996_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_145_cast_fp16)[name = tensor<string, []>("op_1996_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1997_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_147_cast_fp16)[name = tensor<string, []>("op_1997_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1998_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_149_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1999_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_151_cast_fp16)[name = tensor<string, []>("op_1999_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2000_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_153_cast_fp16)[name = tensor<string, []>("op_2000_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2001_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_155_cast_fp16)[name = tensor<string, []>("op_2001_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2002_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_157_cast_fp16)[name = tensor<string, []>("op_2002_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2003_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_159_cast_fp16)[name = tensor<string, []>("op_2003_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2004_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_161_cast_fp16)[name = tensor<string, []>("op_2004_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2005_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_163_cast_fp16)[name = tensor<string, []>("op_2005_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2006_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_165_cast_fp16)[name = tensor<string, []>("op_2006_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2007_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_167_cast_fp16)[name = tensor<string, []>("op_2007_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2008_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_169_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2009_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_171_cast_fp16)[name = tensor<string, []>("op_2009_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2010_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_173_cast_fp16)[name = tensor<string, []>("op_2010_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2011_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_175_cast_fp16)[name = tensor<string, []>("op_2011_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2012_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_177_cast_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2013_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_179_cast_fp16)[name = tensor<string, []>("op_2013_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2014_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_181_cast_fp16)[name = tensor<string, []>("op_2014_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2015_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_183_cast_fp16)[name = tensor<string, []>("op_2015_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2016_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_185_cast_fp16)[name = tensor<string, []>("op_2016_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2017_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_187_cast_fp16)[name = tensor<string, []>("op_2017_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2018_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_189_cast_fp16)[name = tensor<string, []>("op_2018_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2019_cast_fp16 = softmax(axis = var_1485, x = aw_chunk_191_cast_fp16)[name = tensor<string, []>("op_2019_cast_fp16")];
+            tensor<string, []> var_2021_equation_0 = const()[name = tensor<string, []>("op_2021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2021_cast_fp16 = einsum(equation = var_2021_equation_0, values = (var_1829_cast_fp16, var_1988_cast_fp16))[name = tensor<string, []>("op_2021_cast_fp16")];
+            tensor<string, []> var_2023_equation_0 = const()[name = tensor<string, []>("op_2023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2023_cast_fp16 = einsum(equation = var_2023_equation_0, values = (var_1829_cast_fp16, var_1989_cast_fp16))[name = tensor<string, []>("op_2023_cast_fp16")];
+            tensor<string, []> var_2025_equation_0 = const()[name = tensor<string, []>("op_2025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2025_cast_fp16 = einsum(equation = var_2025_equation_0, values = (var_1829_cast_fp16, var_1990_cast_fp16))[name = tensor<string, []>("op_2025_cast_fp16")];
+            tensor<string, []> var_2027_equation_0 = const()[name = tensor<string, []>("op_2027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2027_cast_fp16 = einsum(equation = var_2027_equation_0, values = (var_1829_cast_fp16, var_1991_cast_fp16))[name = tensor<string, []>("op_2027_cast_fp16")];
+            tensor<string, []> var_2029_equation_0 = const()[name = tensor<string, []>("op_2029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2029_cast_fp16 = einsum(equation = var_2029_equation_0, values = (var_1833_cast_fp16, var_1992_cast_fp16))[name = tensor<string, []>("op_2029_cast_fp16")];
+            tensor<string, []> var_2031_equation_0 = const()[name = tensor<string, []>("op_2031_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2031_cast_fp16 = einsum(equation = var_2031_equation_0, values = (var_1833_cast_fp16, var_1993_cast_fp16))[name = tensor<string, []>("op_2031_cast_fp16")];
+            tensor<string, []> var_2033_equation_0 = const()[name = tensor<string, []>("op_2033_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2033_cast_fp16 = einsum(equation = var_2033_equation_0, values = (var_1833_cast_fp16, var_1994_cast_fp16))[name = tensor<string, []>("op_2033_cast_fp16")];
+            tensor<string, []> var_2035_equation_0 = const()[name = tensor<string, []>("op_2035_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2035_cast_fp16 = einsum(equation = var_2035_equation_0, values = (var_1833_cast_fp16, var_1995_cast_fp16))[name = tensor<string, []>("op_2035_cast_fp16")];
+            tensor<string, []> var_2037_equation_0 = const()[name = tensor<string, []>("op_2037_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2037_cast_fp16 = einsum(equation = var_2037_equation_0, values = (var_1837_cast_fp16, var_1996_cast_fp16))[name = tensor<string, []>("op_2037_cast_fp16")];
+            tensor<string, []> var_2039_equation_0 = const()[name = tensor<string, []>("op_2039_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2039_cast_fp16 = einsum(equation = var_2039_equation_0, values = (var_1837_cast_fp16, var_1997_cast_fp16))[name = tensor<string, []>("op_2039_cast_fp16")];
+            tensor<string, []> var_2041_equation_0 = const()[name = tensor<string, []>("op_2041_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2041_cast_fp16 = einsum(equation = var_2041_equation_0, values = (var_1837_cast_fp16, var_1998_cast_fp16))[name = tensor<string, []>("op_2041_cast_fp16")];
+            tensor<string, []> var_2043_equation_0 = const()[name = tensor<string, []>("op_2043_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2043_cast_fp16 = einsum(equation = var_2043_equation_0, values = (var_1837_cast_fp16, var_1999_cast_fp16))[name = tensor<string, []>("op_2043_cast_fp16")];
+            tensor<string, []> var_2045_equation_0 = const()[name = tensor<string, []>("op_2045_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2045_cast_fp16 = einsum(equation = var_2045_equation_0, values = (var_1841_cast_fp16, var_2000_cast_fp16))[name = tensor<string, []>("op_2045_cast_fp16")];
+            tensor<string, []> var_2047_equation_0 = const()[name = tensor<string, []>("op_2047_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2047_cast_fp16 = einsum(equation = var_2047_equation_0, values = (var_1841_cast_fp16, var_2001_cast_fp16))[name = tensor<string, []>("op_2047_cast_fp16")];
+            tensor<string, []> var_2049_equation_0 = const()[name = tensor<string, []>("op_2049_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2049_cast_fp16 = einsum(equation = var_2049_equation_0, values = (var_1841_cast_fp16, var_2002_cast_fp16))[name = tensor<string, []>("op_2049_cast_fp16")];
+            tensor<string, []> var_2051_equation_0 = const()[name = tensor<string, []>("op_2051_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2051_cast_fp16 = einsum(equation = var_2051_equation_0, values = (var_1841_cast_fp16, var_2003_cast_fp16))[name = tensor<string, []>("op_2051_cast_fp16")];
+            tensor<string, []> var_2053_equation_0 = const()[name = tensor<string, []>("op_2053_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2053_cast_fp16 = einsum(equation = var_2053_equation_0, values = (var_1845_cast_fp16, var_2004_cast_fp16))[name = tensor<string, []>("op_2053_cast_fp16")];
+            tensor<string, []> var_2055_equation_0 = const()[name = tensor<string, []>("op_2055_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2055_cast_fp16 = einsum(equation = var_2055_equation_0, values = (var_1845_cast_fp16, var_2005_cast_fp16))[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<string, []> var_2057_equation_0 = const()[name = tensor<string, []>("op_2057_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2057_cast_fp16 = einsum(equation = var_2057_equation_0, values = (var_1845_cast_fp16, var_2006_cast_fp16))[name = tensor<string, []>("op_2057_cast_fp16")];
+            tensor<string, []> var_2059_equation_0 = const()[name = tensor<string, []>("op_2059_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2059_cast_fp16 = einsum(equation = var_2059_equation_0, values = (var_1845_cast_fp16, var_2007_cast_fp16))[name = tensor<string, []>("op_2059_cast_fp16")];
+            tensor<string, []> var_2061_equation_0 = const()[name = tensor<string, []>("op_2061_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2061_cast_fp16 = einsum(equation = var_2061_equation_0, values = (var_1849_cast_fp16, var_2008_cast_fp16))[name = tensor<string, []>("op_2061_cast_fp16")];
+            tensor<string, []> var_2063_equation_0 = const()[name = tensor<string, []>("op_2063_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2063_cast_fp16 = einsum(equation = var_2063_equation_0, values = (var_1849_cast_fp16, var_2009_cast_fp16))[name = tensor<string, []>("op_2063_cast_fp16")];
+            tensor<string, []> var_2065_equation_0 = const()[name = tensor<string, []>("op_2065_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2065_cast_fp16 = einsum(equation = var_2065_equation_0, values = (var_1849_cast_fp16, var_2010_cast_fp16))[name = tensor<string, []>("op_2065_cast_fp16")];
+            tensor<string, []> var_2067_equation_0 = const()[name = tensor<string, []>("op_2067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2067_cast_fp16 = einsum(equation = var_2067_equation_0, values = (var_1849_cast_fp16, var_2011_cast_fp16))[name = tensor<string, []>("op_2067_cast_fp16")];
+            tensor<string, []> var_2069_equation_0 = const()[name = tensor<string, []>("op_2069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2069_cast_fp16 = einsum(equation = var_2069_equation_0, values = (var_1853_cast_fp16, var_2012_cast_fp16))[name = tensor<string, []>("op_2069_cast_fp16")];
+            tensor<string, []> var_2071_equation_0 = const()[name = tensor<string, []>("op_2071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2071_cast_fp16 = einsum(equation = var_2071_equation_0, values = (var_1853_cast_fp16, var_2013_cast_fp16))[name = tensor<string, []>("op_2071_cast_fp16")];
+            tensor<string, []> var_2073_equation_0 = const()[name = tensor<string, []>("op_2073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2073_cast_fp16 = einsum(equation = var_2073_equation_0, values = (var_1853_cast_fp16, var_2014_cast_fp16))[name = tensor<string, []>("op_2073_cast_fp16")];
+            tensor<string, []> var_2075_equation_0 = const()[name = tensor<string, []>("op_2075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2075_cast_fp16 = einsum(equation = var_2075_equation_0, values = (var_1853_cast_fp16, var_2015_cast_fp16))[name = tensor<string, []>("op_2075_cast_fp16")];
+            tensor<string, []> var_2077_equation_0 = const()[name = tensor<string, []>("op_2077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2077_cast_fp16 = einsum(equation = var_2077_equation_0, values = (var_1857_cast_fp16, var_2016_cast_fp16))[name = tensor<string, []>("op_2077_cast_fp16")];
+            tensor<string, []> var_2079_equation_0 = const()[name = tensor<string, []>("op_2079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2079_cast_fp16 = einsum(equation = var_2079_equation_0, values = (var_1857_cast_fp16, var_2017_cast_fp16))[name = tensor<string, []>("op_2079_cast_fp16")];
+            tensor<string, []> var_2081_equation_0 = const()[name = tensor<string, []>("op_2081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2081_cast_fp16 = einsum(equation = var_2081_equation_0, values = (var_1857_cast_fp16, var_2018_cast_fp16))[name = tensor<string, []>("op_2081_cast_fp16")];
+            tensor<string, []> var_2083_equation_0 = const()[name = tensor<string, []>("op_2083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2083_cast_fp16 = einsum(equation = var_2083_equation_0, values = (var_1857_cast_fp16, var_2019_cast_fp16))[name = tensor<string, []>("op_2083_cast_fp16")];
+            tensor<bool, []> var_2085_interleave_0 = const()[name = tensor<string, []>("op_2085_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2085_cast_fp16 = concat(axis = var_1472, interleave = var_2085_interleave_0, values = (var_2021_cast_fp16, var_2023_cast_fp16, var_2025_cast_fp16, var_2027_cast_fp16))[name = tensor<string, []>("op_2085_cast_fp16")];
+            tensor<bool, []> var_2087_interleave_0 = const()[name = tensor<string, []>("op_2087_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2087_cast_fp16 = concat(axis = var_1472, interleave = var_2087_interleave_0, values = (var_2029_cast_fp16, var_2031_cast_fp16, var_2033_cast_fp16, var_2035_cast_fp16))[name = tensor<string, []>("op_2087_cast_fp16")];
+            tensor<bool, []> var_2089_interleave_0 = const()[name = tensor<string, []>("op_2089_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2089_cast_fp16 = concat(axis = var_1472, interleave = var_2089_interleave_0, values = (var_2037_cast_fp16, var_2039_cast_fp16, var_2041_cast_fp16, var_2043_cast_fp16))[name = tensor<string, []>("op_2089_cast_fp16")];
+            tensor<bool, []> var_2091_interleave_0 = const()[name = tensor<string, []>("op_2091_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16 = concat(axis = var_1472, interleave = var_2091_interleave_0, values = (var_2045_cast_fp16, var_2047_cast_fp16, var_2049_cast_fp16, var_2051_cast_fp16))[name = tensor<string, []>("op_2091_cast_fp16")];
+            tensor<bool, []> var_2093_interleave_0 = const()[name = tensor<string, []>("op_2093_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2093_cast_fp16 = concat(axis = var_1472, interleave = var_2093_interleave_0, values = (var_2053_cast_fp16, var_2055_cast_fp16, var_2057_cast_fp16, var_2059_cast_fp16))[name = tensor<string, []>("op_2093_cast_fp16")];
+            tensor<bool, []> var_2095_interleave_0 = const()[name = tensor<string, []>("op_2095_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2095_cast_fp16 = concat(axis = var_1472, interleave = var_2095_interleave_0, values = (var_2061_cast_fp16, var_2063_cast_fp16, var_2065_cast_fp16, var_2067_cast_fp16))[name = tensor<string, []>("op_2095_cast_fp16")];
+            tensor<bool, []> var_2097_interleave_0 = const()[name = tensor<string, []>("op_2097_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2097_cast_fp16 = concat(axis = var_1472, interleave = var_2097_interleave_0, values = (var_2069_cast_fp16, var_2071_cast_fp16, var_2073_cast_fp16, var_2075_cast_fp16))[name = tensor<string, []>("op_2097_cast_fp16")];
+            tensor<bool, []> var_2099_interleave_0 = const()[name = tensor<string, []>("op_2099_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2099_cast_fp16 = concat(axis = var_1472, interleave = var_2099_interleave_0, values = (var_2077_cast_fp16, var_2079_cast_fp16, var_2081_cast_fp16, var_2083_cast_fp16))[name = tensor<string, []>("op_2099_cast_fp16")];
+            tensor<bool, []> input_17_interleave_0 = const()[name = tensor<string, []>("input_17_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_17_cast_fp16 = concat(axis = var_1485, interleave = input_17_interleave_0, values = (var_2085_cast_fp16, var_2087_cast_fp16, var_2089_cast_fp16, var_2091_cast_fp16, var_2093_cast_fp16, var_2095_cast_fp16, var_2097_cast_fp16, var_2099_cast_fp16))[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<int32, [2]> var_2104 = const()[name = tensor<string, []>("op_2104"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2106 = const()[name = tensor<string, []>("op_2106"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17546048)))];
+            tensor<fp16, [512]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18070400)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_2106, groups = var_1485, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_2104, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_2112 = const()[name = tensor<string, []>("op_2112"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_2112, keep_dims = var_1486, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_2116 = const()[name = tensor<string, []>("op_2116"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2117_cast_fp16 = reduce_mean(axes = var_2116, keep_dims = var_1486, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_2117_cast_fp16")];
+            tensor<fp16, []> var_2118_to_fp16 = const()[name = tensor<string, []>("op_2118_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2119_cast_fp16 = add(x = var_2117_cast_fp16, y = var_2118_to_fp16)[name = tensor<string, []>("op_2119_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_2119_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [512]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18071488)))];
+            tensor<fp16, [512]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18072576)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_2130 = const()[name = tensor<string, []>("op_2130"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2132 = const()[name = tensor<string, []>("op_2132"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18073664)))];
+            tensor<fp16, [2048]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20170880)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_2132, groups = var_1485, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = var_2130, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_2138 = const()[name = tensor<string, []>("op_2138"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2140 = const()[name = tensor<string, []>("op_2140"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20175040)))];
+            tensor<fp16, [512]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22272256)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_2140, groups = var_1485, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_2138, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_2147 = const()[name = tensor<string, []>("op_2147"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2160 = const()[name = tensor<string, []>("op_2160"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2161 = const()[name = tensor<string, []>("op_2161"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2171 = const()[name = tensor<string, []>("op_2171"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_2171, keep_dims = var_2161, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_2175 = const()[name = tensor<string, []>("op_2175"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2176_cast_fp16 = reduce_mean(axes = var_2175, keep_dims = var_2161, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<fp16, []> var_2177_to_fp16 = const()[name = tensor<string, []>("op_2177_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2178_cast_fp16 = add(x = var_2176_cast_fp16, y = var_2177_to_fp16)[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_2178_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [512]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22273344)))];
+            tensor<fp16, [512]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22274432)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [2]> var_2193 = const()[name = tensor<string, []>("op_2193"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2195 = const()[name = tensor<string, []>("op_2195"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22275520)))];
+            tensor<fp16, [512]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22799872)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_2195, groups = var_2160, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_2193, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_2199 = const()[name = tensor<string, []>("op_2199"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2201 = const()[name = tensor<string, []>("op_2201"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22800960)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_2201, groups = var_2160, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_2199, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_2206 = const()[name = tensor<string, []>("op_2206"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2208 = const()[name = tensor<string, []>("op_2208"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23325312)))];
+            tensor<fp16, [512]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23849664)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_2208, groups = var_2160, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_2206, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_2215_begin_0 = const()[name = tensor<string, []>("op_2215_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2215_end_0 = const()[name = tensor<string, []>("op_2215_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2215_end_mask_0 = const()[name = tensor<string, []>("op_2215_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2215_cast_fp16 = slice_by_index(begin = var_2215_begin_0, end = var_2215_end_0, end_mask = var_2215_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2215_cast_fp16")];
+            tensor<int32, [4]> var_2219_begin_0 = const()[name = tensor<string, []>("op_2219_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2219_end_0 = const()[name = tensor<string, []>("op_2219_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2219_end_mask_0 = const()[name = tensor<string, []>("op_2219_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2219_cast_fp16 = slice_by_index(begin = var_2219_begin_0, end = var_2219_end_0, end_mask = var_2219_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2219_cast_fp16")];
+            tensor<int32, [4]> var_2223_begin_0 = const()[name = tensor<string, []>("op_2223_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2223_end_0 = const()[name = tensor<string, []>("op_2223_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2223_end_mask_0 = const()[name = tensor<string, []>("op_2223_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2223_cast_fp16 = slice_by_index(begin = var_2223_begin_0, end = var_2223_end_0, end_mask = var_2223_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2223_cast_fp16")];
+            tensor<int32, [4]> var_2227_begin_0 = const()[name = tensor<string, []>("op_2227_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2227_end_0 = const()[name = tensor<string, []>("op_2227_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2227_end_mask_0 = const()[name = tensor<string, []>("op_2227_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2227_cast_fp16 = slice_by_index(begin = var_2227_begin_0, end = var_2227_end_0, end_mask = var_2227_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2227_cast_fp16")];
+            tensor<int32, [4]> var_2231_begin_0 = const()[name = tensor<string, []>("op_2231_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2231_end_0 = const()[name = tensor<string, []>("op_2231_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2231_end_mask_0 = const()[name = tensor<string, []>("op_2231_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2231_cast_fp16 = slice_by_index(begin = var_2231_begin_0, end = var_2231_end_0, end_mask = var_2231_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2231_cast_fp16")];
+            tensor<int32, [4]> var_2235_begin_0 = const()[name = tensor<string, []>("op_2235_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2235_end_0 = const()[name = tensor<string, []>("op_2235_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2235_end_mask_0 = const()[name = tensor<string, []>("op_2235_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2235_cast_fp16 = slice_by_index(begin = var_2235_begin_0, end = var_2235_end_0, end_mask = var_2235_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2235_cast_fp16")];
+            tensor<int32, [4]> var_2239_begin_0 = const()[name = tensor<string, []>("op_2239_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2239_end_0 = const()[name = tensor<string, []>("op_2239_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2239_end_mask_0 = const()[name = tensor<string, []>("op_2239_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2239_cast_fp16 = slice_by_index(begin = var_2239_begin_0, end = var_2239_end_0, end_mask = var_2239_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2239_cast_fp16")];
+            tensor<int32, [4]> var_2243_begin_0 = const()[name = tensor<string, []>("op_2243_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2243_end_0 = const()[name = tensor<string, []>("op_2243_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2243_end_mask_0 = const()[name = tensor<string, []>("op_2243_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2243_cast_fp16 = slice_by_index(begin = var_2243_begin_0, end = var_2243_end_0, end_mask = var_2243_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_2243_cast_fp16")];
+            tensor<int32, [4]> var_2252_begin_0 = const()[name = tensor<string, []>("op_2252_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2252_end_0 = const()[name = tensor<string, []>("op_2252_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2252_end_mask_0 = const()[name = tensor<string, []>("op_2252_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = var_2215_cast_fp16)[name = tensor<string, []>("op_2252_cast_fp16")];
+            tensor<int32, [4]> var_2259_begin_0 = const()[name = tensor<string, []>("op_2259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2259_end_0 = const()[name = tensor<string, []>("op_2259_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2259_end_mask_0 = const()[name = tensor<string, []>("op_2259_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2259_cast_fp16 = slice_by_index(begin = var_2259_begin_0, end = var_2259_end_0, end_mask = var_2259_end_mask_0, x = var_2215_cast_fp16)[name = tensor<string, []>("op_2259_cast_fp16")];
+            tensor<int32, [4]> var_2266_begin_0 = const()[name = tensor<string, []>("op_2266_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2266_end_0 = const()[name = tensor<string, []>("op_2266_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2266_end_mask_0 = const()[name = tensor<string, []>("op_2266_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2266_cast_fp16 = slice_by_index(begin = var_2266_begin_0, end = var_2266_end_0, end_mask = var_2266_end_mask_0, x = var_2215_cast_fp16)[name = tensor<string, []>("op_2266_cast_fp16")];
+            tensor<int32, [4]> var_2273_begin_0 = const()[name = tensor<string, []>("op_2273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2273_end_0 = const()[name = tensor<string, []>("op_2273_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2273_end_mask_0 = const()[name = tensor<string, []>("op_2273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2273_cast_fp16 = slice_by_index(begin = var_2273_begin_0, end = var_2273_end_0, end_mask = var_2273_end_mask_0, x = var_2215_cast_fp16)[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<int32, [4]> var_2280_begin_0 = const()[name = tensor<string, []>("op_2280_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2280_end_0 = const()[name = tensor<string, []>("op_2280_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2280_end_mask_0 = const()[name = tensor<string, []>("op_2280_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = var_2219_cast_fp16)[name = tensor<string, []>("op_2280_cast_fp16")];
+            tensor<int32, [4]> var_2287_begin_0 = const()[name = tensor<string, []>("op_2287_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2287_end_0 = const()[name = tensor<string, []>("op_2287_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2287_end_mask_0 = const()[name = tensor<string, []>("op_2287_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2287_cast_fp16 = slice_by_index(begin = var_2287_begin_0, end = var_2287_end_0, end_mask = var_2287_end_mask_0, x = var_2219_cast_fp16)[name = tensor<string, []>("op_2287_cast_fp16")];
+            tensor<int32, [4]> var_2294_begin_0 = const()[name = tensor<string, []>("op_2294_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2294_end_0 = const()[name = tensor<string, []>("op_2294_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2294_end_mask_0 = const()[name = tensor<string, []>("op_2294_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = var_2219_cast_fp16)[name = tensor<string, []>("op_2294_cast_fp16")];
+            tensor<int32, [4]> var_2301_begin_0 = const()[name = tensor<string, []>("op_2301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2301_end_0 = const()[name = tensor<string, []>("op_2301_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2301_end_mask_0 = const()[name = tensor<string, []>("op_2301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2301_cast_fp16 = slice_by_index(begin = var_2301_begin_0, end = var_2301_end_0, end_mask = var_2301_end_mask_0, x = var_2219_cast_fp16)[name = tensor<string, []>("op_2301_cast_fp16")];
+            tensor<int32, [4]> var_2308_begin_0 = const()[name = tensor<string, []>("op_2308_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2308_end_0 = const()[name = tensor<string, []>("op_2308_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2308_end_mask_0 = const()[name = tensor<string, []>("op_2308_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2308_cast_fp16 = slice_by_index(begin = var_2308_begin_0, end = var_2308_end_0, end_mask = var_2308_end_mask_0, x = var_2223_cast_fp16)[name = tensor<string, []>("op_2308_cast_fp16")];
+            tensor<int32, [4]> var_2315_begin_0 = const()[name = tensor<string, []>("op_2315_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2315_end_0 = const()[name = tensor<string, []>("op_2315_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2315_end_mask_0 = const()[name = tensor<string, []>("op_2315_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2315_cast_fp16 = slice_by_index(begin = var_2315_begin_0, end = var_2315_end_0, end_mask = var_2315_end_mask_0, x = var_2223_cast_fp16)[name = tensor<string, []>("op_2315_cast_fp16")];
+            tensor<int32, [4]> var_2322_begin_0 = const()[name = tensor<string, []>("op_2322_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2322_end_0 = const()[name = tensor<string, []>("op_2322_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2322_end_mask_0 = const()[name = tensor<string, []>("op_2322_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = var_2223_cast_fp16)[name = tensor<string, []>("op_2322_cast_fp16")];
+            tensor<int32, [4]> var_2329_begin_0 = const()[name = tensor<string, []>("op_2329_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2329_end_0 = const()[name = tensor<string, []>("op_2329_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2329_end_mask_0 = const()[name = tensor<string, []>("op_2329_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2329_cast_fp16 = slice_by_index(begin = var_2329_begin_0, end = var_2329_end_0, end_mask = var_2329_end_mask_0, x = var_2223_cast_fp16)[name = tensor<string, []>("op_2329_cast_fp16")];
+            tensor<int32, [4]> var_2336_begin_0 = const()[name = tensor<string, []>("op_2336_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2336_end_0 = const()[name = tensor<string, []>("op_2336_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2336_end_mask_0 = const()[name = tensor<string, []>("op_2336_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2336_cast_fp16 = slice_by_index(begin = var_2336_begin_0, end = var_2336_end_0, end_mask = var_2336_end_mask_0, x = var_2227_cast_fp16)[name = tensor<string, []>("op_2336_cast_fp16")];
+            tensor<int32, [4]> var_2343_begin_0 = const()[name = tensor<string, []>("op_2343_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2343_end_0 = const()[name = tensor<string, []>("op_2343_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2343_end_mask_0 = const()[name = tensor<string, []>("op_2343_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2343_cast_fp16 = slice_by_index(begin = var_2343_begin_0, end = var_2343_end_0, end_mask = var_2343_end_mask_0, x = var_2227_cast_fp16)[name = tensor<string, []>("op_2343_cast_fp16")];
+            tensor<int32, [4]> var_2350_begin_0 = const()[name = tensor<string, []>("op_2350_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2350_end_0 = const()[name = tensor<string, []>("op_2350_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2350_end_mask_0 = const()[name = tensor<string, []>("op_2350_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = var_2227_cast_fp16)[name = tensor<string, []>("op_2350_cast_fp16")];
+            tensor<int32, [4]> var_2357_begin_0 = const()[name = tensor<string, []>("op_2357_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2357_end_0 = const()[name = tensor<string, []>("op_2357_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2357_end_mask_0 = const()[name = tensor<string, []>("op_2357_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2357_cast_fp16 = slice_by_index(begin = var_2357_begin_0, end = var_2357_end_0, end_mask = var_2357_end_mask_0, x = var_2227_cast_fp16)[name = tensor<string, []>("op_2357_cast_fp16")];
+            tensor<int32, [4]> var_2364_begin_0 = const()[name = tensor<string, []>("op_2364_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2364_end_0 = const()[name = tensor<string, []>("op_2364_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2364_end_mask_0 = const()[name = tensor<string, []>("op_2364_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2364_cast_fp16 = slice_by_index(begin = var_2364_begin_0, end = var_2364_end_0, end_mask = var_2364_end_mask_0, x = var_2231_cast_fp16)[name = tensor<string, []>("op_2364_cast_fp16")];
+            tensor<int32, [4]> var_2371_begin_0 = const()[name = tensor<string, []>("op_2371_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2371_end_0 = const()[name = tensor<string, []>("op_2371_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2371_end_mask_0 = const()[name = tensor<string, []>("op_2371_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2371_cast_fp16 = slice_by_index(begin = var_2371_begin_0, end = var_2371_end_0, end_mask = var_2371_end_mask_0, x = var_2231_cast_fp16)[name = tensor<string, []>("op_2371_cast_fp16")];
+            tensor<int32, [4]> var_2378_begin_0 = const()[name = tensor<string, []>("op_2378_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2378_end_0 = const()[name = tensor<string, []>("op_2378_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2378_end_mask_0 = const()[name = tensor<string, []>("op_2378_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = var_2231_cast_fp16)[name = tensor<string, []>("op_2378_cast_fp16")];
+            tensor<int32, [4]> var_2385_begin_0 = const()[name = tensor<string, []>("op_2385_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2385_end_0 = const()[name = tensor<string, []>("op_2385_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2385_end_mask_0 = const()[name = tensor<string, []>("op_2385_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2385_cast_fp16 = slice_by_index(begin = var_2385_begin_0, end = var_2385_end_0, end_mask = var_2385_end_mask_0, x = var_2231_cast_fp16)[name = tensor<string, []>("op_2385_cast_fp16")];
+            tensor<int32, [4]> var_2392_begin_0 = const()[name = tensor<string, []>("op_2392_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2392_end_0 = const()[name = tensor<string, []>("op_2392_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2392_end_mask_0 = const()[name = tensor<string, []>("op_2392_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2392_cast_fp16 = slice_by_index(begin = var_2392_begin_0, end = var_2392_end_0, end_mask = var_2392_end_mask_0, x = var_2235_cast_fp16)[name = tensor<string, []>("op_2392_cast_fp16")];
+            tensor<int32, [4]> var_2399_begin_0 = const()[name = tensor<string, []>("op_2399_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2399_end_0 = const()[name = tensor<string, []>("op_2399_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2399_end_mask_0 = const()[name = tensor<string, []>("op_2399_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2399_cast_fp16 = slice_by_index(begin = var_2399_begin_0, end = var_2399_end_0, end_mask = var_2399_end_mask_0, x = var_2235_cast_fp16)[name = tensor<string, []>("op_2399_cast_fp16")];
+            tensor<int32, [4]> var_2406_begin_0 = const()[name = tensor<string, []>("op_2406_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2406_end_0 = const()[name = tensor<string, []>("op_2406_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2406_end_mask_0 = const()[name = tensor<string, []>("op_2406_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = var_2235_cast_fp16)[name = tensor<string, []>("op_2406_cast_fp16")];
+            tensor<int32, [4]> var_2413_begin_0 = const()[name = tensor<string, []>("op_2413_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2413_end_0 = const()[name = tensor<string, []>("op_2413_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2413_end_mask_0 = const()[name = tensor<string, []>("op_2413_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2413_cast_fp16 = slice_by_index(begin = var_2413_begin_0, end = var_2413_end_0, end_mask = var_2413_end_mask_0, x = var_2235_cast_fp16)[name = tensor<string, []>("op_2413_cast_fp16")];
+            tensor<int32, [4]> var_2420_begin_0 = const()[name = tensor<string, []>("op_2420_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2420_end_0 = const()[name = tensor<string, []>("op_2420_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2420_end_mask_0 = const()[name = tensor<string, []>("op_2420_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2420_cast_fp16 = slice_by_index(begin = var_2420_begin_0, end = var_2420_end_0, end_mask = var_2420_end_mask_0, x = var_2239_cast_fp16)[name = tensor<string, []>("op_2420_cast_fp16")];
+            tensor<int32, [4]> var_2427_begin_0 = const()[name = tensor<string, []>("op_2427_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2427_end_0 = const()[name = tensor<string, []>("op_2427_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2427_end_mask_0 = const()[name = tensor<string, []>("op_2427_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2427_cast_fp16 = slice_by_index(begin = var_2427_begin_0, end = var_2427_end_0, end_mask = var_2427_end_mask_0, x = var_2239_cast_fp16)[name = tensor<string, []>("op_2427_cast_fp16")];
+            tensor<int32, [4]> var_2434_begin_0 = const()[name = tensor<string, []>("op_2434_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2434_end_0 = const()[name = tensor<string, []>("op_2434_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2434_end_mask_0 = const()[name = tensor<string, []>("op_2434_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2434_cast_fp16 = slice_by_index(begin = var_2434_begin_0, end = var_2434_end_0, end_mask = var_2434_end_mask_0, x = var_2239_cast_fp16)[name = tensor<string, []>("op_2434_cast_fp16")];
+            tensor<int32, [4]> var_2441_begin_0 = const()[name = tensor<string, []>("op_2441_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2441_end_0 = const()[name = tensor<string, []>("op_2441_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2441_end_mask_0 = const()[name = tensor<string, []>("op_2441_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2441_cast_fp16 = slice_by_index(begin = var_2441_begin_0, end = var_2441_end_0, end_mask = var_2441_end_mask_0, x = var_2239_cast_fp16)[name = tensor<string, []>("op_2441_cast_fp16")];
+            tensor<int32, [4]> var_2448_begin_0 = const()[name = tensor<string, []>("op_2448_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2448_end_0 = const()[name = tensor<string, []>("op_2448_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2448_end_mask_0 = const()[name = tensor<string, []>("op_2448_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2448_cast_fp16 = slice_by_index(begin = var_2448_begin_0, end = var_2448_end_0, end_mask = var_2448_end_mask_0, x = var_2243_cast_fp16)[name = tensor<string, []>("op_2448_cast_fp16")];
+            tensor<int32, [4]> var_2455_begin_0 = const()[name = tensor<string, []>("op_2455_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2455_end_0 = const()[name = tensor<string, []>("op_2455_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2455_end_mask_0 = const()[name = tensor<string, []>("op_2455_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2455_cast_fp16 = slice_by_index(begin = var_2455_begin_0, end = var_2455_end_0, end_mask = var_2455_end_mask_0, x = var_2243_cast_fp16)[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<int32, [4]> var_2462_begin_0 = const()[name = tensor<string, []>("op_2462_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2462_end_0 = const()[name = tensor<string, []>("op_2462_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2462_end_mask_0 = const()[name = tensor<string, []>("op_2462_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2462_cast_fp16 = slice_by_index(begin = var_2462_begin_0, end = var_2462_end_0, end_mask = var_2462_end_mask_0, x = var_2243_cast_fp16)[name = tensor<string, []>("op_2462_cast_fp16")];
+            tensor<int32, [4]> var_2469_begin_0 = const()[name = tensor<string, []>("op_2469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2469_end_0 = const()[name = tensor<string, []>("op_2469_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2469_end_mask_0 = const()[name = tensor<string, []>("op_2469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2469_cast_fp16 = slice_by_index(begin = var_2469_begin_0, end = var_2469_end_0, end_mask = var_2469_end_mask_0, x = var_2243_cast_fp16)[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<int32, [4]> k_7_perm_0 = const()[name = tensor<string, []>("k_7_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_2474_begin_0 = const()[name = tensor<string, []>("op_2474_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2474_end_0 = const()[name = tensor<string, []>("op_2474_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_2474_end_mask_0 = const()[name = tensor<string, []>("op_2474_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_2 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2474_cast_fp16 = slice_by_index(begin = var_2474_begin_0, end = var_2474_end_0, end_mask = var_2474_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2474_cast_fp16")];
+            tensor<int32, [4]> var_2478_begin_0 = const()[name = tensor<string, []>("op_2478_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_2478_end_0 = const()[name = tensor<string, []>("op_2478_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_2478_end_mask_0 = const()[name = tensor<string, []>("op_2478_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2478_cast_fp16 = slice_by_index(begin = var_2478_begin_0, end = var_2478_end_0, end_mask = var_2478_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<int32, [4]> var_2482_begin_0 = const()[name = tensor<string, []>("op_2482_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_2482_end_0 = const()[name = tensor<string, []>("op_2482_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_2482_end_mask_0 = const()[name = tensor<string, []>("op_2482_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2482_cast_fp16 = slice_by_index(begin = var_2482_begin_0, end = var_2482_end_0, end_mask = var_2482_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2482_cast_fp16")];
+            tensor<int32, [4]> var_2486_begin_0 = const()[name = tensor<string, []>("op_2486_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_2486_end_0 = const()[name = tensor<string, []>("op_2486_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_2486_end_mask_0 = const()[name = tensor<string, []>("op_2486_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2486_cast_fp16 = slice_by_index(begin = var_2486_begin_0, end = var_2486_end_0, end_mask = var_2486_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2486_cast_fp16")];
+            tensor<int32, [4]> var_2490_begin_0 = const()[name = tensor<string, []>("op_2490_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_2490_end_0 = const()[name = tensor<string, []>("op_2490_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_2490_end_mask_0 = const()[name = tensor<string, []>("op_2490_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2490_cast_fp16 = slice_by_index(begin = var_2490_begin_0, end = var_2490_end_0, end_mask = var_2490_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2490_cast_fp16")];
+            tensor<int32, [4]> var_2494_begin_0 = const()[name = tensor<string, []>("op_2494_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_2494_end_0 = const()[name = tensor<string, []>("op_2494_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_2494_end_mask_0 = const()[name = tensor<string, []>("op_2494_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2494_cast_fp16 = slice_by_index(begin = var_2494_begin_0, end = var_2494_end_0, end_mask = var_2494_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2494_cast_fp16")];
+            tensor<int32, [4]> var_2498_begin_0 = const()[name = tensor<string, []>("op_2498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_2498_end_0 = const()[name = tensor<string, []>("op_2498_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_2498_end_mask_0 = const()[name = tensor<string, []>("op_2498_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2498_cast_fp16 = slice_by_index(begin = var_2498_begin_0, end = var_2498_end_0, end_mask = var_2498_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2498_cast_fp16")];
+            tensor<int32, [4]> var_2502_begin_0 = const()[name = tensor<string, []>("op_2502_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_2502_end_0 = const()[name = tensor<string, []>("op_2502_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_2502_end_mask_0 = const()[name = tensor<string, []>("op_2502_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2502_cast_fp16 = slice_by_index(begin = var_2502_begin_0, end = var_2502_end_0, end_mask = var_2502_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_2502_cast_fp16")];
+            tensor<int32, [4]> var_2504_begin_0 = const()[name = tensor<string, []>("op_2504_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2504_end_0 = const()[name = tensor<string, []>("op_2504_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2504_end_mask_0 = const()[name = tensor<string, []>("op_2504_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2504_cast_fp16 = slice_by_index(begin = var_2504_begin_0, end = var_2504_end_0, end_mask = var_2504_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2504_cast_fp16")];
+            tensor<int32, [4]> var_2508_begin_0 = const()[name = tensor<string, []>("op_2508_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2508_end_0 = const()[name = tensor<string, []>("op_2508_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2508_end_mask_0 = const()[name = tensor<string, []>("op_2508_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2508_cast_fp16 = slice_by_index(begin = var_2508_begin_0, end = var_2508_end_0, end_mask = var_2508_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2508_cast_fp16")];
+            tensor<int32, [4]> var_2512_begin_0 = const()[name = tensor<string, []>("op_2512_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2512_end_0 = const()[name = tensor<string, []>("op_2512_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2512_end_mask_0 = const()[name = tensor<string, []>("op_2512_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2512_cast_fp16 = slice_by_index(begin = var_2512_begin_0, end = var_2512_end_0, end_mask = var_2512_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2512_cast_fp16")];
+            tensor<int32, [4]> var_2516_begin_0 = const()[name = tensor<string, []>("op_2516_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2516_end_0 = const()[name = tensor<string, []>("op_2516_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2516_end_mask_0 = const()[name = tensor<string, []>("op_2516_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2516_cast_fp16 = slice_by_index(begin = var_2516_begin_0, end = var_2516_end_0, end_mask = var_2516_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2516_cast_fp16")];
+            tensor<int32, [4]> var_2520_begin_0 = const()[name = tensor<string, []>("op_2520_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2520_end_0 = const()[name = tensor<string, []>("op_2520_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2520_end_mask_0 = const()[name = tensor<string, []>("op_2520_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2520_cast_fp16 = slice_by_index(begin = var_2520_begin_0, end = var_2520_end_0, end_mask = var_2520_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2520_cast_fp16")];
+            tensor<int32, [4]> var_2524_begin_0 = const()[name = tensor<string, []>("op_2524_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2524_end_0 = const()[name = tensor<string, []>("op_2524_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2524_end_mask_0 = const()[name = tensor<string, []>("op_2524_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2524_cast_fp16 = slice_by_index(begin = var_2524_begin_0, end = var_2524_end_0, end_mask = var_2524_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2524_cast_fp16")];
+            tensor<int32, [4]> var_2528_begin_0 = const()[name = tensor<string, []>("op_2528_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2528_end_0 = const()[name = tensor<string, []>("op_2528_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2528_end_mask_0 = const()[name = tensor<string, []>("op_2528_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2528_cast_fp16 = slice_by_index(begin = var_2528_begin_0, end = var_2528_end_0, end_mask = var_2528_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2528_cast_fp16")];
+            tensor<int32, [4]> var_2532_begin_0 = const()[name = tensor<string, []>("op_2532_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2532_end_0 = const()[name = tensor<string, []>("op_2532_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2532_end_mask_0 = const()[name = tensor<string, []>("op_2532_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2532_cast_fp16 = slice_by_index(begin = var_2532_begin_0, end = var_2532_end_0, end_mask = var_2532_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_2532_cast_fp16")];
+            tensor<string, []> var_2536_equation_0 = const()[name = tensor<string, []>("op_2536_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2536_cast_fp16 = einsum(equation = var_2536_equation_0, values = (var_2474_cast_fp16, var_2252_cast_fp16))[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, []> var_2537_to_fp16 = const()[name = tensor<string, []>("op_2537_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_193_cast_fp16 = mul(x = var_2536_cast_fp16, y = var_2537_to_fp16)[name = tensor<string, []>("aw_chunk_193_cast_fp16")];
+            tensor<string, []> var_2540_equation_0 = const()[name = tensor<string, []>("op_2540_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2540_cast_fp16 = einsum(equation = var_2540_equation_0, values = (var_2474_cast_fp16, var_2259_cast_fp16))[name = tensor<string, []>("op_2540_cast_fp16")];
+            tensor<fp16, []> var_2541_to_fp16 = const()[name = tensor<string, []>("op_2541_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_195_cast_fp16 = mul(x = var_2540_cast_fp16, y = var_2541_to_fp16)[name = tensor<string, []>("aw_chunk_195_cast_fp16")];
+            tensor<string, []> var_2544_equation_0 = const()[name = tensor<string, []>("op_2544_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2544_cast_fp16 = einsum(equation = var_2544_equation_0, values = (var_2474_cast_fp16, var_2266_cast_fp16))[name = tensor<string, []>("op_2544_cast_fp16")];
+            tensor<fp16, []> var_2545_to_fp16 = const()[name = tensor<string, []>("op_2545_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_197_cast_fp16 = mul(x = var_2544_cast_fp16, y = var_2545_to_fp16)[name = tensor<string, []>("aw_chunk_197_cast_fp16")];
+            tensor<string, []> var_2548_equation_0 = const()[name = tensor<string, []>("op_2548_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2548_cast_fp16 = einsum(equation = var_2548_equation_0, values = (var_2474_cast_fp16, var_2273_cast_fp16))[name = tensor<string, []>("op_2548_cast_fp16")];
+            tensor<fp16, []> var_2549_to_fp16 = const()[name = tensor<string, []>("op_2549_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_199_cast_fp16 = mul(x = var_2548_cast_fp16, y = var_2549_to_fp16)[name = tensor<string, []>("aw_chunk_199_cast_fp16")];
+            tensor<string, []> var_2552_equation_0 = const()[name = tensor<string, []>("op_2552_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2552_cast_fp16 = einsum(equation = var_2552_equation_0, values = (var_2478_cast_fp16, var_2280_cast_fp16))[name = tensor<string, []>("op_2552_cast_fp16")];
+            tensor<fp16, []> var_2553_to_fp16 = const()[name = tensor<string, []>("op_2553_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_201_cast_fp16 = mul(x = var_2552_cast_fp16, y = var_2553_to_fp16)[name = tensor<string, []>("aw_chunk_201_cast_fp16")];
+            tensor<string, []> var_2556_equation_0 = const()[name = tensor<string, []>("op_2556_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2556_cast_fp16 = einsum(equation = var_2556_equation_0, values = (var_2478_cast_fp16, var_2287_cast_fp16))[name = tensor<string, []>("op_2556_cast_fp16")];
+            tensor<fp16, []> var_2557_to_fp16 = const()[name = tensor<string, []>("op_2557_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_203_cast_fp16 = mul(x = var_2556_cast_fp16, y = var_2557_to_fp16)[name = tensor<string, []>("aw_chunk_203_cast_fp16")];
+            tensor<string, []> var_2560_equation_0 = const()[name = tensor<string, []>("op_2560_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2560_cast_fp16 = einsum(equation = var_2560_equation_0, values = (var_2478_cast_fp16, var_2294_cast_fp16))[name = tensor<string, []>("op_2560_cast_fp16")];
+            tensor<fp16, []> var_2561_to_fp16 = const()[name = tensor<string, []>("op_2561_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_205_cast_fp16 = mul(x = var_2560_cast_fp16, y = var_2561_to_fp16)[name = tensor<string, []>("aw_chunk_205_cast_fp16")];
+            tensor<string, []> var_2564_equation_0 = const()[name = tensor<string, []>("op_2564_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2564_cast_fp16 = einsum(equation = var_2564_equation_0, values = (var_2478_cast_fp16, var_2301_cast_fp16))[name = tensor<string, []>("op_2564_cast_fp16")];
+            tensor<fp16, []> var_2565_to_fp16 = const()[name = tensor<string, []>("op_2565_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_207_cast_fp16 = mul(x = var_2564_cast_fp16, y = var_2565_to_fp16)[name = tensor<string, []>("aw_chunk_207_cast_fp16")];
+            tensor<string, []> var_2568_equation_0 = const()[name = tensor<string, []>("op_2568_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2568_cast_fp16 = einsum(equation = var_2568_equation_0, values = (var_2482_cast_fp16, var_2308_cast_fp16))[name = tensor<string, []>("op_2568_cast_fp16")];
+            tensor<fp16, []> var_2569_to_fp16 = const()[name = tensor<string, []>("op_2569_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_209_cast_fp16 = mul(x = var_2568_cast_fp16, y = var_2569_to_fp16)[name = tensor<string, []>("aw_chunk_209_cast_fp16")];
+            tensor<string, []> var_2572_equation_0 = const()[name = tensor<string, []>("op_2572_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2572_cast_fp16 = einsum(equation = var_2572_equation_0, values = (var_2482_cast_fp16, var_2315_cast_fp16))[name = tensor<string, []>("op_2572_cast_fp16")];
+            tensor<fp16, []> var_2573_to_fp16 = const()[name = tensor<string, []>("op_2573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_211_cast_fp16 = mul(x = var_2572_cast_fp16, y = var_2573_to_fp16)[name = tensor<string, []>("aw_chunk_211_cast_fp16")];
+            tensor<string, []> var_2576_equation_0 = const()[name = tensor<string, []>("op_2576_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2576_cast_fp16 = einsum(equation = var_2576_equation_0, values = (var_2482_cast_fp16, var_2322_cast_fp16))[name = tensor<string, []>("op_2576_cast_fp16")];
+            tensor<fp16, []> var_2577_to_fp16 = const()[name = tensor<string, []>("op_2577_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_213_cast_fp16 = mul(x = var_2576_cast_fp16, y = var_2577_to_fp16)[name = tensor<string, []>("aw_chunk_213_cast_fp16")];
+            tensor<string, []> var_2580_equation_0 = const()[name = tensor<string, []>("op_2580_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2580_cast_fp16 = einsum(equation = var_2580_equation_0, values = (var_2482_cast_fp16, var_2329_cast_fp16))[name = tensor<string, []>("op_2580_cast_fp16")];
+            tensor<fp16, []> var_2581_to_fp16 = const()[name = tensor<string, []>("op_2581_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_215_cast_fp16 = mul(x = var_2580_cast_fp16, y = var_2581_to_fp16)[name = tensor<string, []>("aw_chunk_215_cast_fp16")];
+            tensor<string, []> var_2584_equation_0 = const()[name = tensor<string, []>("op_2584_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2584_cast_fp16 = einsum(equation = var_2584_equation_0, values = (var_2486_cast_fp16, var_2336_cast_fp16))[name = tensor<string, []>("op_2584_cast_fp16")];
+            tensor<fp16, []> var_2585_to_fp16 = const()[name = tensor<string, []>("op_2585_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_217_cast_fp16 = mul(x = var_2584_cast_fp16, y = var_2585_to_fp16)[name = tensor<string, []>("aw_chunk_217_cast_fp16")];
+            tensor<string, []> var_2588_equation_0 = const()[name = tensor<string, []>("op_2588_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2588_cast_fp16 = einsum(equation = var_2588_equation_0, values = (var_2486_cast_fp16, var_2343_cast_fp16))[name = tensor<string, []>("op_2588_cast_fp16")];
+            tensor<fp16, []> var_2589_to_fp16 = const()[name = tensor<string, []>("op_2589_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_219_cast_fp16 = mul(x = var_2588_cast_fp16, y = var_2589_to_fp16)[name = tensor<string, []>("aw_chunk_219_cast_fp16")];
+            tensor<string, []> var_2592_equation_0 = const()[name = tensor<string, []>("op_2592_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2592_cast_fp16 = einsum(equation = var_2592_equation_0, values = (var_2486_cast_fp16, var_2350_cast_fp16))[name = tensor<string, []>("op_2592_cast_fp16")];
+            tensor<fp16, []> var_2593_to_fp16 = const()[name = tensor<string, []>("op_2593_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_221_cast_fp16 = mul(x = var_2592_cast_fp16, y = var_2593_to_fp16)[name = tensor<string, []>("aw_chunk_221_cast_fp16")];
+            tensor<string, []> var_2596_equation_0 = const()[name = tensor<string, []>("op_2596_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2596_cast_fp16 = einsum(equation = var_2596_equation_0, values = (var_2486_cast_fp16, var_2357_cast_fp16))[name = tensor<string, []>("op_2596_cast_fp16")];
+            tensor<fp16, []> var_2597_to_fp16 = const()[name = tensor<string, []>("op_2597_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_223_cast_fp16 = mul(x = var_2596_cast_fp16, y = var_2597_to_fp16)[name = tensor<string, []>("aw_chunk_223_cast_fp16")];
+            tensor<string, []> var_2600_equation_0 = const()[name = tensor<string, []>("op_2600_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2600_cast_fp16 = einsum(equation = var_2600_equation_0, values = (var_2490_cast_fp16, var_2364_cast_fp16))[name = tensor<string, []>("op_2600_cast_fp16")];
+            tensor<fp16, []> var_2601_to_fp16 = const()[name = tensor<string, []>("op_2601_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_225_cast_fp16 = mul(x = var_2600_cast_fp16, y = var_2601_to_fp16)[name = tensor<string, []>("aw_chunk_225_cast_fp16")];
+            tensor<string, []> var_2604_equation_0 = const()[name = tensor<string, []>("op_2604_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2604_cast_fp16 = einsum(equation = var_2604_equation_0, values = (var_2490_cast_fp16, var_2371_cast_fp16))[name = tensor<string, []>("op_2604_cast_fp16")];
+            tensor<fp16, []> var_2605_to_fp16 = const()[name = tensor<string, []>("op_2605_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_227_cast_fp16 = mul(x = var_2604_cast_fp16, y = var_2605_to_fp16)[name = tensor<string, []>("aw_chunk_227_cast_fp16")];
+            tensor<string, []> var_2608_equation_0 = const()[name = tensor<string, []>("op_2608_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2608_cast_fp16 = einsum(equation = var_2608_equation_0, values = (var_2490_cast_fp16, var_2378_cast_fp16))[name = tensor<string, []>("op_2608_cast_fp16")];
+            tensor<fp16, []> var_2609_to_fp16 = const()[name = tensor<string, []>("op_2609_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_229_cast_fp16 = mul(x = var_2608_cast_fp16, y = var_2609_to_fp16)[name = tensor<string, []>("aw_chunk_229_cast_fp16")];
+            tensor<string, []> var_2612_equation_0 = const()[name = tensor<string, []>("op_2612_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2490_cast_fp16, var_2385_cast_fp16))[name = tensor<string, []>("op_2612_cast_fp16")];
+            tensor<fp16, []> var_2613_to_fp16 = const()[name = tensor<string, []>("op_2613_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_231_cast_fp16 = mul(x = var_2612_cast_fp16, y = var_2613_to_fp16)[name = tensor<string, []>("aw_chunk_231_cast_fp16")];
+            tensor<string, []> var_2616_equation_0 = const()[name = tensor<string, []>("op_2616_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2494_cast_fp16, var_2392_cast_fp16))[name = tensor<string, []>("op_2616_cast_fp16")];
+            tensor<fp16, []> var_2617_to_fp16 = const()[name = tensor<string, []>("op_2617_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_233_cast_fp16 = mul(x = var_2616_cast_fp16, y = var_2617_to_fp16)[name = tensor<string, []>("aw_chunk_233_cast_fp16")];
+            tensor<string, []> var_2620_equation_0 = const()[name = tensor<string, []>("op_2620_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2494_cast_fp16, var_2399_cast_fp16))[name = tensor<string, []>("op_2620_cast_fp16")];
+            tensor<fp16, []> var_2621_to_fp16 = const()[name = tensor<string, []>("op_2621_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_235_cast_fp16 = mul(x = var_2620_cast_fp16, y = var_2621_to_fp16)[name = tensor<string, []>("aw_chunk_235_cast_fp16")];
+            tensor<string, []> var_2624_equation_0 = const()[name = tensor<string, []>("op_2624_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2494_cast_fp16, var_2406_cast_fp16))[name = tensor<string, []>("op_2624_cast_fp16")];
+            tensor<fp16, []> var_2625_to_fp16 = const()[name = tensor<string, []>("op_2625_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_237_cast_fp16 = mul(x = var_2624_cast_fp16, y = var_2625_to_fp16)[name = tensor<string, []>("aw_chunk_237_cast_fp16")];
+            tensor<string, []> var_2628_equation_0 = const()[name = tensor<string, []>("op_2628_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2494_cast_fp16, var_2413_cast_fp16))[name = tensor<string, []>("op_2628_cast_fp16")];
+            tensor<fp16, []> var_2629_to_fp16 = const()[name = tensor<string, []>("op_2629_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_239_cast_fp16 = mul(x = var_2628_cast_fp16, y = var_2629_to_fp16)[name = tensor<string, []>("aw_chunk_239_cast_fp16")];
+            tensor<string, []> var_2632_equation_0 = const()[name = tensor<string, []>("op_2632_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2498_cast_fp16, var_2420_cast_fp16))[name = tensor<string, []>("op_2632_cast_fp16")];
+            tensor<fp16, []> var_2633_to_fp16 = const()[name = tensor<string, []>("op_2633_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_241_cast_fp16 = mul(x = var_2632_cast_fp16, y = var_2633_to_fp16)[name = tensor<string, []>("aw_chunk_241_cast_fp16")];
+            tensor<string, []> var_2636_equation_0 = const()[name = tensor<string, []>("op_2636_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2498_cast_fp16, var_2427_cast_fp16))[name = tensor<string, []>("op_2636_cast_fp16")];
+            tensor<fp16, []> var_2637_to_fp16 = const()[name = tensor<string, []>("op_2637_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_243_cast_fp16 = mul(x = var_2636_cast_fp16, y = var_2637_to_fp16)[name = tensor<string, []>("aw_chunk_243_cast_fp16")];
+            tensor<string, []> var_2640_equation_0 = const()[name = tensor<string, []>("op_2640_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2498_cast_fp16, var_2434_cast_fp16))[name = tensor<string, []>("op_2640_cast_fp16")];
+            tensor<fp16, []> var_2641_to_fp16 = const()[name = tensor<string, []>("op_2641_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_245_cast_fp16 = mul(x = var_2640_cast_fp16, y = var_2641_to_fp16)[name = tensor<string, []>("aw_chunk_245_cast_fp16")];
+            tensor<string, []> var_2644_equation_0 = const()[name = tensor<string, []>("op_2644_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2498_cast_fp16, var_2441_cast_fp16))[name = tensor<string, []>("op_2644_cast_fp16")];
+            tensor<fp16, []> var_2645_to_fp16 = const()[name = tensor<string, []>("op_2645_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_247_cast_fp16 = mul(x = var_2644_cast_fp16, y = var_2645_to_fp16)[name = tensor<string, []>("aw_chunk_247_cast_fp16")];
+            tensor<string, []> var_2648_equation_0 = const()[name = tensor<string, []>("op_2648_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2502_cast_fp16, var_2448_cast_fp16))[name = tensor<string, []>("op_2648_cast_fp16")];
+            tensor<fp16, []> var_2649_to_fp16 = const()[name = tensor<string, []>("op_2649_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_249_cast_fp16 = mul(x = var_2648_cast_fp16, y = var_2649_to_fp16)[name = tensor<string, []>("aw_chunk_249_cast_fp16")];
+            tensor<string, []> var_2652_equation_0 = const()[name = tensor<string, []>("op_2652_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2502_cast_fp16, var_2455_cast_fp16))[name = tensor<string, []>("op_2652_cast_fp16")];
+            tensor<fp16, []> var_2653_to_fp16 = const()[name = tensor<string, []>("op_2653_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_251_cast_fp16 = mul(x = var_2652_cast_fp16, y = var_2653_to_fp16)[name = tensor<string, []>("aw_chunk_251_cast_fp16")];
+            tensor<string, []> var_2656_equation_0 = const()[name = tensor<string, []>("op_2656_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2502_cast_fp16, var_2462_cast_fp16))[name = tensor<string, []>("op_2656_cast_fp16")];
+            tensor<fp16, []> var_2657_to_fp16 = const()[name = tensor<string, []>("op_2657_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_253_cast_fp16 = mul(x = var_2656_cast_fp16, y = var_2657_to_fp16)[name = tensor<string, []>("aw_chunk_253_cast_fp16")];
+            tensor<string, []> var_2660_equation_0 = const()[name = tensor<string, []>("op_2660_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2502_cast_fp16, var_2469_cast_fp16))[name = tensor<string, []>("op_2660_cast_fp16")];
+            tensor<fp16, []> var_2661_to_fp16 = const()[name = tensor<string, []>("op_2661_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_255_cast_fp16 = mul(x = var_2660_cast_fp16, y = var_2661_to_fp16)[name = tensor<string, []>("aw_chunk_255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2663_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_193_cast_fp16)[name = tensor<string, []>("op_2663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2664_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_195_cast_fp16)[name = tensor<string, []>("op_2664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2665_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_197_cast_fp16)[name = tensor<string, []>("op_2665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2666_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_199_cast_fp16)[name = tensor<string, []>("op_2666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2667_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_201_cast_fp16)[name = tensor<string, []>("op_2667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2668_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_203_cast_fp16)[name = tensor<string, []>("op_2668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2669_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_205_cast_fp16)[name = tensor<string, []>("op_2669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2670_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_207_cast_fp16)[name = tensor<string, []>("op_2670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2671_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_209_cast_fp16)[name = tensor<string, []>("op_2671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2672_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_211_cast_fp16)[name = tensor<string, []>("op_2672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2673_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_213_cast_fp16)[name = tensor<string, []>("op_2673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2674_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_215_cast_fp16)[name = tensor<string, []>("op_2674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2675_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_217_cast_fp16)[name = tensor<string, []>("op_2675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2676_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_219_cast_fp16)[name = tensor<string, []>("op_2676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2677_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_221_cast_fp16)[name = tensor<string, []>("op_2677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2678_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_223_cast_fp16)[name = tensor<string, []>("op_2678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2679_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_225_cast_fp16)[name = tensor<string, []>("op_2679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2680_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_227_cast_fp16)[name = tensor<string, []>("op_2680_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2681_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_229_cast_fp16)[name = tensor<string, []>("op_2681_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2682_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_231_cast_fp16)[name = tensor<string, []>("op_2682_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2683_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_233_cast_fp16)[name = tensor<string, []>("op_2683_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2684_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_235_cast_fp16)[name = tensor<string, []>("op_2684_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2685_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_237_cast_fp16)[name = tensor<string, []>("op_2685_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2686_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_239_cast_fp16)[name = tensor<string, []>("op_2686_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2687_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_241_cast_fp16)[name = tensor<string, []>("op_2687_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2688_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_243_cast_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2689_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_245_cast_fp16)[name = tensor<string, []>("op_2689_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2690_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_247_cast_fp16)[name = tensor<string, []>("op_2690_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2691_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_249_cast_fp16)[name = tensor<string, []>("op_2691_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2692_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_251_cast_fp16)[name = tensor<string, []>("op_2692_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2693_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_253_cast_fp16)[name = tensor<string, []>("op_2693_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2694_cast_fp16 = softmax(axis = var_2160, x = aw_chunk_255_cast_fp16)[name = tensor<string, []>("op_2694_cast_fp16")];
+            tensor<string, []> var_2696_equation_0 = const()[name = tensor<string, []>("op_2696_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2696_cast_fp16 = einsum(equation = var_2696_equation_0, values = (var_2504_cast_fp16, var_2663_cast_fp16))[name = tensor<string, []>("op_2696_cast_fp16")];
+            tensor<string, []> var_2698_equation_0 = const()[name = tensor<string, []>("op_2698_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2698_cast_fp16 = einsum(equation = var_2698_equation_0, values = (var_2504_cast_fp16, var_2664_cast_fp16))[name = tensor<string, []>("op_2698_cast_fp16")];
+            tensor<string, []> var_2700_equation_0 = const()[name = tensor<string, []>("op_2700_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2700_cast_fp16 = einsum(equation = var_2700_equation_0, values = (var_2504_cast_fp16, var_2665_cast_fp16))[name = tensor<string, []>("op_2700_cast_fp16")];
+            tensor<string, []> var_2702_equation_0 = const()[name = tensor<string, []>("op_2702_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2702_cast_fp16 = einsum(equation = var_2702_equation_0, values = (var_2504_cast_fp16, var_2666_cast_fp16))[name = tensor<string, []>("op_2702_cast_fp16")];
+            tensor<string, []> var_2704_equation_0 = const()[name = tensor<string, []>("op_2704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2704_cast_fp16 = einsum(equation = var_2704_equation_0, values = (var_2508_cast_fp16, var_2667_cast_fp16))[name = tensor<string, []>("op_2704_cast_fp16")];
+            tensor<string, []> var_2706_equation_0 = const()[name = tensor<string, []>("op_2706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2706_cast_fp16 = einsum(equation = var_2706_equation_0, values = (var_2508_cast_fp16, var_2668_cast_fp16))[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<string, []> var_2708_equation_0 = const()[name = tensor<string, []>("op_2708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2708_cast_fp16 = einsum(equation = var_2708_equation_0, values = (var_2508_cast_fp16, var_2669_cast_fp16))[name = tensor<string, []>("op_2708_cast_fp16")];
+            tensor<string, []> var_2710_equation_0 = const()[name = tensor<string, []>("op_2710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2710_cast_fp16 = einsum(equation = var_2710_equation_0, values = (var_2508_cast_fp16, var_2670_cast_fp16))[name = tensor<string, []>("op_2710_cast_fp16")];
+            tensor<string, []> var_2712_equation_0 = const()[name = tensor<string, []>("op_2712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2712_cast_fp16 = einsum(equation = var_2712_equation_0, values = (var_2512_cast_fp16, var_2671_cast_fp16))[name = tensor<string, []>("op_2712_cast_fp16")];
+            tensor<string, []> var_2714_equation_0 = const()[name = tensor<string, []>("op_2714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2714_cast_fp16 = einsum(equation = var_2714_equation_0, values = (var_2512_cast_fp16, var_2672_cast_fp16))[name = tensor<string, []>("op_2714_cast_fp16")];
+            tensor<string, []> var_2716_equation_0 = const()[name = tensor<string, []>("op_2716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2716_cast_fp16 = einsum(equation = var_2716_equation_0, values = (var_2512_cast_fp16, var_2673_cast_fp16))[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<string, []> var_2718_equation_0 = const()[name = tensor<string, []>("op_2718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2718_cast_fp16 = einsum(equation = var_2718_equation_0, values = (var_2512_cast_fp16, var_2674_cast_fp16))[name = tensor<string, []>("op_2718_cast_fp16")];
+            tensor<string, []> var_2720_equation_0 = const()[name = tensor<string, []>("op_2720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2720_cast_fp16 = einsum(equation = var_2720_equation_0, values = (var_2516_cast_fp16, var_2675_cast_fp16))[name = tensor<string, []>("op_2720_cast_fp16")];
+            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2516_cast_fp16, var_2676_cast_fp16))[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<string, []> var_2724_equation_0 = const()[name = tensor<string, []>("op_2724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2516_cast_fp16, var_2677_cast_fp16))[name = tensor<string, []>("op_2724_cast_fp16")];
+            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2516_cast_fp16, var_2678_cast_fp16))[name = tensor<string, []>("op_2726_cast_fp16")];
+            tensor<string, []> var_2728_equation_0 = const()[name = tensor<string, []>("op_2728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2728_cast_fp16 = einsum(equation = var_2728_equation_0, values = (var_2520_cast_fp16, var_2679_cast_fp16))[name = tensor<string, []>("op_2728_cast_fp16")];
+            tensor<string, []> var_2730_equation_0 = const()[name = tensor<string, []>("op_2730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2730_cast_fp16 = einsum(equation = var_2730_equation_0, values = (var_2520_cast_fp16, var_2680_cast_fp16))[name = tensor<string, []>("op_2730_cast_fp16")];
+            tensor<string, []> var_2732_equation_0 = const()[name = tensor<string, []>("op_2732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2732_cast_fp16 = einsum(equation = var_2732_equation_0, values = (var_2520_cast_fp16, var_2681_cast_fp16))[name = tensor<string, []>("op_2732_cast_fp16")];
+            tensor<string, []> var_2734_equation_0 = const()[name = tensor<string, []>("op_2734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2734_cast_fp16 = einsum(equation = var_2734_equation_0, values = (var_2520_cast_fp16, var_2682_cast_fp16))[name = tensor<string, []>("op_2734_cast_fp16")];
+            tensor<string, []> var_2736_equation_0 = const()[name = tensor<string, []>("op_2736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2736_cast_fp16 = einsum(equation = var_2736_equation_0, values = (var_2524_cast_fp16, var_2683_cast_fp16))[name = tensor<string, []>("op_2736_cast_fp16")];
+            tensor<string, []> var_2738_equation_0 = const()[name = tensor<string, []>("op_2738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2738_cast_fp16 = einsum(equation = var_2738_equation_0, values = (var_2524_cast_fp16, var_2684_cast_fp16))[name = tensor<string, []>("op_2738_cast_fp16")];
+            tensor<string, []> var_2740_equation_0 = const()[name = tensor<string, []>("op_2740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2740_cast_fp16 = einsum(equation = var_2740_equation_0, values = (var_2524_cast_fp16, var_2685_cast_fp16))[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<string, []> var_2742_equation_0 = const()[name = tensor<string, []>("op_2742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2742_cast_fp16 = einsum(equation = var_2742_equation_0, values = (var_2524_cast_fp16, var_2686_cast_fp16))[name = tensor<string, []>("op_2742_cast_fp16")];
+            tensor<string, []> var_2744_equation_0 = const()[name = tensor<string, []>("op_2744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2744_cast_fp16 = einsum(equation = var_2744_equation_0, values = (var_2528_cast_fp16, var_2687_cast_fp16))[name = tensor<string, []>("op_2744_cast_fp16")];
+            tensor<string, []> var_2746_equation_0 = const()[name = tensor<string, []>("op_2746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2746_cast_fp16 = einsum(equation = var_2746_equation_0, values = (var_2528_cast_fp16, var_2688_cast_fp16))[name = tensor<string, []>("op_2746_cast_fp16")];
+            tensor<string, []> var_2748_equation_0 = const()[name = tensor<string, []>("op_2748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2748_cast_fp16 = einsum(equation = var_2748_equation_0, values = (var_2528_cast_fp16, var_2689_cast_fp16))[name = tensor<string, []>("op_2748_cast_fp16")];
+            tensor<string, []> var_2750_equation_0 = const()[name = tensor<string, []>("op_2750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2750_cast_fp16 = einsum(equation = var_2750_equation_0, values = (var_2528_cast_fp16, var_2690_cast_fp16))[name = tensor<string, []>("op_2750_cast_fp16")];
+            tensor<string, []> var_2752_equation_0 = const()[name = tensor<string, []>("op_2752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2752_cast_fp16 = einsum(equation = var_2752_equation_0, values = (var_2532_cast_fp16, var_2691_cast_fp16))[name = tensor<string, []>("op_2752_cast_fp16")];
+            tensor<string, []> var_2754_equation_0 = const()[name = tensor<string, []>("op_2754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2754_cast_fp16 = einsum(equation = var_2754_equation_0, values = (var_2532_cast_fp16, var_2692_cast_fp16))[name = tensor<string, []>("op_2754_cast_fp16")];
+            tensor<string, []> var_2756_equation_0 = const()[name = tensor<string, []>("op_2756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2756_cast_fp16 = einsum(equation = var_2756_equation_0, values = (var_2532_cast_fp16, var_2693_cast_fp16))[name = tensor<string, []>("op_2756_cast_fp16")];
+            tensor<string, []> var_2758_equation_0 = const()[name = tensor<string, []>("op_2758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2758_cast_fp16 = einsum(equation = var_2758_equation_0, values = (var_2532_cast_fp16, var_2694_cast_fp16))[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<bool, []> var_2760_interleave_0 = const()[name = tensor<string, []>("op_2760_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2760_cast_fp16 = concat(axis = var_2147, interleave = var_2760_interleave_0, values = (var_2696_cast_fp16, var_2698_cast_fp16, var_2700_cast_fp16, var_2702_cast_fp16))[name = tensor<string, []>("op_2760_cast_fp16")];
+            tensor<bool, []> var_2762_interleave_0 = const()[name = tensor<string, []>("op_2762_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2762_cast_fp16 = concat(axis = var_2147, interleave = var_2762_interleave_0, values = (var_2704_cast_fp16, var_2706_cast_fp16, var_2708_cast_fp16, var_2710_cast_fp16))[name = tensor<string, []>("op_2762_cast_fp16")];
+            tensor<bool, []> var_2764_interleave_0 = const()[name = tensor<string, []>("op_2764_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2764_cast_fp16 = concat(axis = var_2147, interleave = var_2764_interleave_0, values = (var_2712_cast_fp16, var_2714_cast_fp16, var_2716_cast_fp16, var_2718_cast_fp16))[name = tensor<string, []>("op_2764_cast_fp16")];
+            tensor<bool, []> var_2766_interleave_0 = const()[name = tensor<string, []>("op_2766_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2766_cast_fp16 = concat(axis = var_2147, interleave = var_2766_interleave_0, values = (var_2720_cast_fp16, var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16))[name = tensor<string, []>("op_2766_cast_fp16")];
+            tensor<bool, []> var_2768_interleave_0 = const()[name = tensor<string, []>("op_2768_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2768_cast_fp16 = concat(axis = var_2147, interleave = var_2768_interleave_0, values = (var_2728_cast_fp16, var_2730_cast_fp16, var_2732_cast_fp16, var_2734_cast_fp16))[name = tensor<string, []>("op_2768_cast_fp16")];
+            tensor<bool, []> var_2770_interleave_0 = const()[name = tensor<string, []>("op_2770_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2770_cast_fp16 = concat(axis = var_2147, interleave = var_2770_interleave_0, values = (var_2736_cast_fp16, var_2738_cast_fp16, var_2740_cast_fp16, var_2742_cast_fp16))[name = tensor<string, []>("op_2770_cast_fp16")];
+            tensor<bool, []> var_2772_interleave_0 = const()[name = tensor<string, []>("op_2772_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2772_cast_fp16 = concat(axis = var_2147, interleave = var_2772_interleave_0, values = (var_2744_cast_fp16, var_2746_cast_fp16, var_2748_cast_fp16, var_2750_cast_fp16))[name = tensor<string, []>("op_2772_cast_fp16")];
+            tensor<bool, []> var_2774_interleave_0 = const()[name = tensor<string, []>("op_2774_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2774_cast_fp16 = concat(axis = var_2147, interleave = var_2774_interleave_0, values = (var_2752_cast_fp16, var_2754_cast_fp16, var_2756_cast_fp16, var_2758_cast_fp16))[name = tensor<string, []>("op_2774_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_25_cast_fp16 = concat(axis = var_2160, interleave = input_25_interleave_0, values = (var_2760_cast_fp16, var_2762_cast_fp16, var_2764_cast_fp16, var_2766_cast_fp16, var_2768_cast_fp16, var_2770_cast_fp16, var_2772_cast_fp16, var_2774_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_2779 = const()[name = tensor<string, []>("op_2779"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2781 = const()[name = tensor<string, []>("op_2781"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_15_pad_type_0 = const()[name = tensor<string, []>("obj_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_15_pad_0 = const()[name = tensor<string, []>("obj_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23850752)))];
+            tensor<fp16, [512]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24375104)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_2781, groups = var_2160, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = var_2779, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_2787 = const()[name = tensor<string, []>("op_2787"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_2787, keep_dims = var_2161, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_2791 = const()[name = tensor<string, []>("op_2791"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2792_cast_fp16 = reduce_mean(axes = var_2791, keep_dims = var_2161, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_2792_cast_fp16")];
+            tensor<fp16, []> var_2793_to_fp16 = const()[name = tensor<string, []>("op_2793_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2794_cast_fp16 = add(x = var_2792_cast_fp16, y = var_2793_to_fp16)[name = tensor<string, []>("op_2794_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_2794_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [512]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24376192)))];
+            tensor<fp16, [512]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24377280)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<int32, [2]> var_2805 = const()[name = tensor<string, []>("op_2805"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2807 = const()[name = tensor<string, []>("op_2807"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24378368)))];
+            tensor<fp16, [2048]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26475584)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_2807, groups = var_2160, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = var_2805, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_2813 = const()[name = tensor<string, []>("op_2813"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2815 = const()[name = tensor<string, []>("op_2815"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26479744)))];
+            tensor<fp16, [512]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28576960)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_2815, groups = var_2160, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_2813, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_2822 = const()[name = tensor<string, []>("op_2822"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2835 = const()[name = tensor<string, []>("op_2835"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2836 = const()[name = tensor<string, []>("op_2836"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2846 = const()[name = tensor<string, []>("op_2846"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_2846, keep_dims = var_2836, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_2850 = const()[name = tensor<string, []>("op_2850"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2851_cast_fp16 = reduce_mean(axes = var_2850, keep_dims = var_2836, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_2851_cast_fp16")];
+            tensor<fp16, []> var_2852_to_fp16 = const()[name = tensor<string, []>("op_2852_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2853_cast_fp16 = add(x = var_2851_cast_fp16, y = var_2852_to_fp16)[name = tensor<string, []>("op_2853_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_2853_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [512]> obj_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28578048)))];
+            tensor<fp16, [512]> obj_17_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_17_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28579136)))];
+            tensor<fp16, []> obj_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("obj_17_cast_fp16")];
+            tensor<int32, [2]> var_2868 = const()[name = tensor<string, []>("op_2868"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2870 = const()[name = tensor<string, []>("op_2870"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28580224)))];
+            tensor<fp16, [512]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29104576)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_2870, groups = var_2835, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_2868, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_2874 = const()[name = tensor<string, []>("op_2874"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2876 = const()[name = tensor<string, []>("op_2876"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_9_pad_type_0 = const()[name = tensor<string, []>("key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_9_pad_0 = const()[name = tensor<string, []>("key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29105664)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_9_cast_fp16 = conv(dilations = var_2876, groups = var_2835, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = var_2874, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<int32, [2]> var_2881 = const()[name = tensor<string, []>("op_2881"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2883 = const()[name = tensor<string, []>("op_2883"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_9_pad_type_0 = const()[name = tensor<string, []>("value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_9_pad_0 = const()[name = tensor<string, []>("value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29630016)))];
+            tensor<fp16, [512]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30154368)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_2883, groups = var_2835, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = var_2881, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_2890_begin_0 = const()[name = tensor<string, []>("op_2890_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2890_end_0 = const()[name = tensor<string, []>("op_2890_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2890_end_mask_0 = const()[name = tensor<string, []>("op_2890_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2890_cast_fp16 = slice_by_index(begin = var_2890_begin_0, end = var_2890_end_0, end_mask = var_2890_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2890_cast_fp16")];
+            tensor<int32, [4]> var_2894_begin_0 = const()[name = tensor<string, []>("op_2894_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2894_end_0 = const()[name = tensor<string, []>("op_2894_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2894_end_mask_0 = const()[name = tensor<string, []>("op_2894_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2894_cast_fp16 = slice_by_index(begin = var_2894_begin_0, end = var_2894_end_0, end_mask = var_2894_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2894_cast_fp16")];
+            tensor<int32, [4]> var_2898_begin_0 = const()[name = tensor<string, []>("op_2898_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2898_end_0 = const()[name = tensor<string, []>("op_2898_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2898_end_mask_0 = const()[name = tensor<string, []>("op_2898_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2898_cast_fp16 = slice_by_index(begin = var_2898_begin_0, end = var_2898_end_0, end_mask = var_2898_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2898_cast_fp16")];
+            tensor<int32, [4]> var_2902_begin_0 = const()[name = tensor<string, []>("op_2902_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2902_end_0 = const()[name = tensor<string, []>("op_2902_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2902_end_mask_0 = const()[name = tensor<string, []>("op_2902_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2902_cast_fp16 = slice_by_index(begin = var_2902_begin_0, end = var_2902_end_0, end_mask = var_2902_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2902_cast_fp16")];
+            tensor<int32, [4]> var_2906_begin_0 = const()[name = tensor<string, []>("op_2906_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2906_end_0 = const()[name = tensor<string, []>("op_2906_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2906_end_mask_0 = const()[name = tensor<string, []>("op_2906_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2906_cast_fp16 = slice_by_index(begin = var_2906_begin_0, end = var_2906_end_0, end_mask = var_2906_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2906_cast_fp16")];
+            tensor<int32, [4]> var_2910_begin_0 = const()[name = tensor<string, []>("op_2910_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2910_end_0 = const()[name = tensor<string, []>("op_2910_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2910_end_mask_0 = const()[name = tensor<string, []>("op_2910_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2910_cast_fp16 = slice_by_index(begin = var_2910_begin_0, end = var_2910_end_0, end_mask = var_2910_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2910_cast_fp16")];
+            tensor<int32, [4]> var_2914_begin_0 = const()[name = tensor<string, []>("op_2914_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2914_end_0 = const()[name = tensor<string, []>("op_2914_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2914_end_mask_0 = const()[name = tensor<string, []>("op_2914_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2914_cast_fp16")];
+            tensor<int32, [4]> var_2918_begin_0 = const()[name = tensor<string, []>("op_2918_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2918_end_0 = const()[name = tensor<string, []>("op_2918_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2918_end_mask_0 = const()[name = tensor<string, []>("op_2918_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2918_cast_fp16 = slice_by_index(begin = var_2918_begin_0, end = var_2918_end_0, end_mask = var_2918_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_2918_cast_fp16")];
+            tensor<int32, [4]> var_2927_begin_0 = const()[name = tensor<string, []>("op_2927_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2927_end_0 = const()[name = tensor<string, []>("op_2927_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2927_end_mask_0 = const()[name = tensor<string, []>("op_2927_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2927_cast_fp16 = slice_by_index(begin = var_2927_begin_0, end = var_2927_end_0, end_mask = var_2927_end_mask_0, x = var_2890_cast_fp16)[name = tensor<string, []>("op_2927_cast_fp16")];
+            tensor<int32, [4]> var_2934_begin_0 = const()[name = tensor<string, []>("op_2934_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2934_end_0 = const()[name = tensor<string, []>("op_2934_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2934_end_mask_0 = const()[name = tensor<string, []>("op_2934_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2934_cast_fp16 = slice_by_index(begin = var_2934_begin_0, end = var_2934_end_0, end_mask = var_2934_end_mask_0, x = var_2890_cast_fp16)[name = tensor<string, []>("op_2934_cast_fp16")];
+            tensor<int32, [4]> var_2941_begin_0 = const()[name = tensor<string, []>("op_2941_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2941_end_0 = const()[name = tensor<string, []>("op_2941_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2941_end_mask_0 = const()[name = tensor<string, []>("op_2941_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2941_cast_fp16 = slice_by_index(begin = var_2941_begin_0, end = var_2941_end_0, end_mask = var_2941_end_mask_0, x = var_2890_cast_fp16)[name = tensor<string, []>("op_2941_cast_fp16")];
+            tensor<int32, [4]> var_2948_begin_0 = const()[name = tensor<string, []>("op_2948_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2948_end_0 = const()[name = tensor<string, []>("op_2948_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2948_end_mask_0 = const()[name = tensor<string, []>("op_2948_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2948_cast_fp16 = slice_by_index(begin = var_2948_begin_0, end = var_2948_end_0, end_mask = var_2948_end_mask_0, x = var_2890_cast_fp16)[name = tensor<string, []>("op_2948_cast_fp16")];
+            tensor<int32, [4]> var_2955_begin_0 = const()[name = tensor<string, []>("op_2955_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2955_end_0 = const()[name = tensor<string, []>("op_2955_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2955_end_mask_0 = const()[name = tensor<string, []>("op_2955_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2955_cast_fp16 = slice_by_index(begin = var_2955_begin_0, end = var_2955_end_0, end_mask = var_2955_end_mask_0, x = var_2894_cast_fp16)[name = tensor<string, []>("op_2955_cast_fp16")];
+            tensor<int32, [4]> var_2962_begin_0 = const()[name = tensor<string, []>("op_2962_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2962_end_0 = const()[name = tensor<string, []>("op_2962_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2962_end_mask_0 = const()[name = tensor<string, []>("op_2962_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2962_cast_fp16 = slice_by_index(begin = var_2962_begin_0, end = var_2962_end_0, end_mask = var_2962_end_mask_0, x = var_2894_cast_fp16)[name = tensor<string, []>("op_2962_cast_fp16")];
+            tensor<int32, [4]> var_2969_begin_0 = const()[name = tensor<string, []>("op_2969_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2969_end_0 = const()[name = tensor<string, []>("op_2969_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2969_end_mask_0 = const()[name = tensor<string, []>("op_2969_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2969_cast_fp16 = slice_by_index(begin = var_2969_begin_0, end = var_2969_end_0, end_mask = var_2969_end_mask_0, x = var_2894_cast_fp16)[name = tensor<string, []>("op_2969_cast_fp16")];
+            tensor<int32, [4]> var_2976_begin_0 = const()[name = tensor<string, []>("op_2976_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2976_end_0 = const()[name = tensor<string, []>("op_2976_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2976_end_mask_0 = const()[name = tensor<string, []>("op_2976_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2976_cast_fp16 = slice_by_index(begin = var_2976_begin_0, end = var_2976_end_0, end_mask = var_2976_end_mask_0, x = var_2894_cast_fp16)[name = tensor<string, []>("op_2976_cast_fp16")];
+            tensor<int32, [4]> var_2983_begin_0 = const()[name = tensor<string, []>("op_2983_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2983_end_0 = const()[name = tensor<string, []>("op_2983_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2983_end_mask_0 = const()[name = tensor<string, []>("op_2983_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2983_cast_fp16 = slice_by_index(begin = var_2983_begin_0, end = var_2983_end_0, end_mask = var_2983_end_mask_0, x = var_2898_cast_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<int32, [4]> var_2990_begin_0 = const()[name = tensor<string, []>("op_2990_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2990_end_0 = const()[name = tensor<string, []>("op_2990_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2990_end_mask_0 = const()[name = tensor<string, []>("op_2990_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2990_cast_fp16 = slice_by_index(begin = var_2990_begin_0, end = var_2990_end_0, end_mask = var_2990_end_mask_0, x = var_2898_cast_fp16)[name = tensor<string, []>("op_2990_cast_fp16")];
+            tensor<int32, [4]> var_2997_begin_0 = const()[name = tensor<string, []>("op_2997_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2997_end_0 = const()[name = tensor<string, []>("op_2997_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2997_end_mask_0 = const()[name = tensor<string, []>("op_2997_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2997_cast_fp16 = slice_by_index(begin = var_2997_begin_0, end = var_2997_end_0, end_mask = var_2997_end_mask_0, x = var_2898_cast_fp16)[name = tensor<string, []>("op_2997_cast_fp16")];
+            tensor<int32, [4]> var_3004_begin_0 = const()[name = tensor<string, []>("op_3004_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3004_end_0 = const()[name = tensor<string, []>("op_3004_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3004_end_mask_0 = const()[name = tensor<string, []>("op_3004_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3004_cast_fp16 = slice_by_index(begin = var_3004_begin_0, end = var_3004_end_0, end_mask = var_3004_end_mask_0, x = var_2898_cast_fp16)[name = tensor<string, []>("op_3004_cast_fp16")];
+            tensor<int32, [4]> var_3011_begin_0 = const()[name = tensor<string, []>("op_3011_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3011_end_0 = const()[name = tensor<string, []>("op_3011_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3011_end_mask_0 = const()[name = tensor<string, []>("op_3011_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = var_3011_end_0, end_mask = var_3011_end_mask_0, x = var_2902_cast_fp16)[name = tensor<string, []>("op_3011_cast_fp16")];
+            tensor<int32, [4]> var_3018_begin_0 = const()[name = tensor<string, []>("op_3018_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3018_end_0 = const()[name = tensor<string, []>("op_3018_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3018_end_mask_0 = const()[name = tensor<string, []>("op_3018_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3018_cast_fp16 = slice_by_index(begin = var_3018_begin_0, end = var_3018_end_0, end_mask = var_3018_end_mask_0, x = var_2902_cast_fp16)[name = tensor<string, []>("op_3018_cast_fp16")];
+            tensor<int32, [4]> var_3025_begin_0 = const()[name = tensor<string, []>("op_3025_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3025_end_0 = const()[name = tensor<string, []>("op_3025_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3025_end_mask_0 = const()[name = tensor<string, []>("op_3025_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3025_cast_fp16 = slice_by_index(begin = var_3025_begin_0, end = var_3025_end_0, end_mask = var_3025_end_mask_0, x = var_2902_cast_fp16)[name = tensor<string, []>("op_3025_cast_fp16")];
+            tensor<int32, [4]> var_3032_begin_0 = const()[name = tensor<string, []>("op_3032_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3032_end_0 = const()[name = tensor<string, []>("op_3032_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3032_end_mask_0 = const()[name = tensor<string, []>("op_3032_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3032_cast_fp16 = slice_by_index(begin = var_3032_begin_0, end = var_3032_end_0, end_mask = var_3032_end_mask_0, x = var_2902_cast_fp16)[name = tensor<string, []>("op_3032_cast_fp16")];
+            tensor<int32, [4]> var_3039_begin_0 = const()[name = tensor<string, []>("op_3039_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3039_end_0 = const()[name = tensor<string, []>("op_3039_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3039_end_mask_0 = const()[name = tensor<string, []>("op_3039_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3039_cast_fp16 = slice_by_index(begin = var_3039_begin_0, end = var_3039_end_0, end_mask = var_3039_end_mask_0, x = var_2906_cast_fp16)[name = tensor<string, []>("op_3039_cast_fp16")];
+            tensor<int32, [4]> var_3046_begin_0 = const()[name = tensor<string, []>("op_3046_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3046_end_0 = const()[name = tensor<string, []>("op_3046_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3046_end_mask_0 = const()[name = tensor<string, []>("op_3046_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3046_cast_fp16 = slice_by_index(begin = var_3046_begin_0, end = var_3046_end_0, end_mask = var_3046_end_mask_0, x = var_2906_cast_fp16)[name = tensor<string, []>("op_3046_cast_fp16")];
+            tensor<int32, [4]> var_3053_begin_0 = const()[name = tensor<string, []>("op_3053_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3053_end_0 = const()[name = tensor<string, []>("op_3053_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3053_end_mask_0 = const()[name = tensor<string, []>("op_3053_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3053_cast_fp16 = slice_by_index(begin = var_3053_begin_0, end = var_3053_end_0, end_mask = var_3053_end_mask_0, x = var_2906_cast_fp16)[name = tensor<string, []>("op_3053_cast_fp16")];
+            tensor<int32, [4]> var_3060_begin_0 = const()[name = tensor<string, []>("op_3060_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3060_end_0 = const()[name = tensor<string, []>("op_3060_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3060_end_mask_0 = const()[name = tensor<string, []>("op_3060_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3060_cast_fp16 = slice_by_index(begin = var_3060_begin_0, end = var_3060_end_0, end_mask = var_3060_end_mask_0, x = var_2906_cast_fp16)[name = tensor<string, []>("op_3060_cast_fp16")];
+            tensor<int32, [4]> var_3067_begin_0 = const()[name = tensor<string, []>("op_3067_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3067_end_0 = const()[name = tensor<string, []>("op_3067_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3067_end_mask_0 = const()[name = tensor<string, []>("op_3067_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3067_cast_fp16 = slice_by_index(begin = var_3067_begin_0, end = var_3067_end_0, end_mask = var_3067_end_mask_0, x = var_2910_cast_fp16)[name = tensor<string, []>("op_3067_cast_fp16")];
+            tensor<int32, [4]> var_3074_begin_0 = const()[name = tensor<string, []>("op_3074_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3074_end_0 = const()[name = tensor<string, []>("op_3074_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3074_end_mask_0 = const()[name = tensor<string, []>("op_3074_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3074_cast_fp16 = slice_by_index(begin = var_3074_begin_0, end = var_3074_end_0, end_mask = var_3074_end_mask_0, x = var_2910_cast_fp16)[name = tensor<string, []>("op_3074_cast_fp16")];
+            tensor<int32, [4]> var_3081_begin_0 = const()[name = tensor<string, []>("op_3081_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3081_end_0 = const()[name = tensor<string, []>("op_3081_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3081_end_mask_0 = const()[name = tensor<string, []>("op_3081_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3081_cast_fp16 = slice_by_index(begin = var_3081_begin_0, end = var_3081_end_0, end_mask = var_3081_end_mask_0, x = var_2910_cast_fp16)[name = tensor<string, []>("op_3081_cast_fp16")];
+            tensor<int32, [4]> var_3088_begin_0 = const()[name = tensor<string, []>("op_3088_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3088_end_0 = const()[name = tensor<string, []>("op_3088_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3088_end_mask_0 = const()[name = tensor<string, []>("op_3088_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3088_cast_fp16 = slice_by_index(begin = var_3088_begin_0, end = var_3088_end_0, end_mask = var_3088_end_mask_0, x = var_2910_cast_fp16)[name = tensor<string, []>("op_3088_cast_fp16")];
+            tensor<int32, [4]> var_3095_begin_0 = const()[name = tensor<string, []>("op_3095_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3095_end_0 = const()[name = tensor<string, []>("op_3095_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3095_end_mask_0 = const()[name = tensor<string, []>("op_3095_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3095_cast_fp16 = slice_by_index(begin = var_3095_begin_0, end = var_3095_end_0, end_mask = var_3095_end_mask_0, x = var_2914_cast_fp16)[name = tensor<string, []>("op_3095_cast_fp16")];
+            tensor<int32, [4]> var_3102_begin_0 = const()[name = tensor<string, []>("op_3102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3102_end_0 = const()[name = tensor<string, []>("op_3102_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3102_end_mask_0 = const()[name = tensor<string, []>("op_3102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3102_cast_fp16 = slice_by_index(begin = var_3102_begin_0, end = var_3102_end_0, end_mask = var_3102_end_mask_0, x = var_2914_cast_fp16)[name = tensor<string, []>("op_3102_cast_fp16")];
+            tensor<int32, [4]> var_3109_begin_0 = const()[name = tensor<string, []>("op_3109_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3109_end_0 = const()[name = tensor<string, []>("op_3109_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3109_end_mask_0 = const()[name = tensor<string, []>("op_3109_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3109_cast_fp16 = slice_by_index(begin = var_3109_begin_0, end = var_3109_end_0, end_mask = var_3109_end_mask_0, x = var_2914_cast_fp16)[name = tensor<string, []>("op_3109_cast_fp16")];
+            tensor<int32, [4]> var_3116_begin_0 = const()[name = tensor<string, []>("op_3116_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3116_end_0 = const()[name = tensor<string, []>("op_3116_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3116_end_mask_0 = const()[name = tensor<string, []>("op_3116_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3116_cast_fp16 = slice_by_index(begin = var_3116_begin_0, end = var_3116_end_0, end_mask = var_3116_end_mask_0, x = var_2914_cast_fp16)[name = tensor<string, []>("op_3116_cast_fp16")];
+            tensor<int32, [4]> var_3123_begin_0 = const()[name = tensor<string, []>("op_3123_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3123_end_0 = const()[name = tensor<string, []>("op_3123_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3123_end_mask_0 = const()[name = tensor<string, []>("op_3123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3123_cast_fp16 = slice_by_index(begin = var_3123_begin_0, end = var_3123_end_0, end_mask = var_3123_end_mask_0, x = var_2918_cast_fp16)[name = tensor<string, []>("op_3123_cast_fp16")];
+            tensor<int32, [4]> var_3130_begin_0 = const()[name = tensor<string, []>("op_3130_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3130_end_0 = const()[name = tensor<string, []>("op_3130_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3130_end_mask_0 = const()[name = tensor<string, []>("op_3130_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3130_cast_fp16 = slice_by_index(begin = var_3130_begin_0, end = var_3130_end_0, end_mask = var_3130_end_mask_0, x = var_2918_cast_fp16)[name = tensor<string, []>("op_3130_cast_fp16")];
+            tensor<int32, [4]> var_3137_begin_0 = const()[name = tensor<string, []>("op_3137_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3137_end_0 = const()[name = tensor<string, []>("op_3137_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3137_end_mask_0 = const()[name = tensor<string, []>("op_3137_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3137_cast_fp16 = slice_by_index(begin = var_3137_begin_0, end = var_3137_end_0, end_mask = var_3137_end_mask_0, x = var_2918_cast_fp16)[name = tensor<string, []>("op_3137_cast_fp16")];
+            tensor<int32, [4]> var_3144_begin_0 = const()[name = tensor<string, []>("op_3144_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3144_end_0 = const()[name = tensor<string, []>("op_3144_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3144_end_mask_0 = const()[name = tensor<string, []>("op_3144_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3144_cast_fp16 = slice_by_index(begin = var_3144_begin_0, end = var_3144_end_0, end_mask = var_3144_end_mask_0, x = var_2918_cast_fp16)[name = tensor<string, []>("op_3144_cast_fp16")];
+            tensor<int32, [4]> k_9_perm_0 = const()[name = tensor<string, []>("k_9_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_3149_begin_0 = const()[name = tensor<string, []>("op_3149_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3149_end_0 = const()[name = tensor<string, []>("op_3149_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_3149_end_mask_0 = const()[name = tensor<string, []>("op_3149_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_1 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3149_cast_fp16 = slice_by_index(begin = var_3149_begin_0, end = var_3149_end_0, end_mask = var_3149_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3149_cast_fp16")];
+            tensor<int32, [4]> var_3153_begin_0 = const()[name = tensor<string, []>("op_3153_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_3153_end_0 = const()[name = tensor<string, []>("op_3153_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_3153_end_mask_0 = const()[name = tensor<string, []>("op_3153_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3153_cast_fp16 = slice_by_index(begin = var_3153_begin_0, end = var_3153_end_0, end_mask = var_3153_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3153_cast_fp16")];
+            tensor<int32, [4]> var_3157_begin_0 = const()[name = tensor<string, []>("op_3157_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_3157_end_0 = const()[name = tensor<string, []>("op_3157_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_3157_end_mask_0 = const()[name = tensor<string, []>("op_3157_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16 = slice_by_index(begin = var_3157_begin_0, end = var_3157_end_0, end_mask = var_3157_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [4]> var_3161_begin_0 = const()[name = tensor<string, []>("op_3161_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_3161_end_0 = const()[name = tensor<string, []>("op_3161_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_3161_end_mask_0 = const()[name = tensor<string, []>("op_3161_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3161_cast_fp16 = slice_by_index(begin = var_3161_begin_0, end = var_3161_end_0, end_mask = var_3161_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3161_cast_fp16")];
+            tensor<int32, [4]> var_3165_begin_0 = const()[name = tensor<string, []>("op_3165_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_3165_end_0 = const()[name = tensor<string, []>("op_3165_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_3165_end_mask_0 = const()[name = tensor<string, []>("op_3165_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3165_cast_fp16 = slice_by_index(begin = var_3165_begin_0, end = var_3165_end_0, end_mask = var_3165_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3165_cast_fp16")];
+            tensor<int32, [4]> var_3169_begin_0 = const()[name = tensor<string, []>("op_3169_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_3169_end_0 = const()[name = tensor<string, []>("op_3169_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_3169_end_mask_0 = const()[name = tensor<string, []>("op_3169_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3169_cast_fp16 = slice_by_index(begin = var_3169_begin_0, end = var_3169_end_0, end_mask = var_3169_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3169_cast_fp16")];
+            tensor<int32, [4]> var_3173_begin_0 = const()[name = tensor<string, []>("op_3173_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_3173_end_0 = const()[name = tensor<string, []>("op_3173_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_3173_end_mask_0 = const()[name = tensor<string, []>("op_3173_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3173_cast_fp16 = slice_by_index(begin = var_3173_begin_0, end = var_3173_end_0, end_mask = var_3173_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3173_cast_fp16")];
+            tensor<int32, [4]> var_3177_begin_0 = const()[name = tensor<string, []>("op_3177_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_3177_end_0 = const()[name = tensor<string, []>("op_3177_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_3177_end_mask_0 = const()[name = tensor<string, []>("op_3177_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3177_cast_fp16 = slice_by_index(begin = var_3177_begin_0, end = var_3177_end_0, end_mask = var_3177_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_3177_cast_fp16")];
+            tensor<int32, [4]> var_3179_begin_0 = const()[name = tensor<string, []>("op_3179_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3179_end_0 = const()[name = tensor<string, []>("op_3179_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3179_end_mask_0 = const()[name = tensor<string, []>("op_3179_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3179_cast_fp16 = slice_by_index(begin = var_3179_begin_0, end = var_3179_end_0, end_mask = var_3179_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3179_cast_fp16")];
+            tensor<int32, [4]> var_3183_begin_0 = const()[name = tensor<string, []>("op_3183_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3183_end_0 = const()[name = tensor<string, []>("op_3183_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3183_end_mask_0 = const()[name = tensor<string, []>("op_3183_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3183_cast_fp16 = slice_by_index(begin = var_3183_begin_0, end = var_3183_end_0, end_mask = var_3183_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3183_cast_fp16")];
+            tensor<int32, [4]> var_3187_begin_0 = const()[name = tensor<string, []>("op_3187_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3187_end_0 = const()[name = tensor<string, []>("op_3187_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3187_end_mask_0 = const()[name = tensor<string, []>("op_3187_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3187_cast_fp16 = slice_by_index(begin = var_3187_begin_0, end = var_3187_end_0, end_mask = var_3187_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3187_cast_fp16")];
+            tensor<int32, [4]> var_3191_begin_0 = const()[name = tensor<string, []>("op_3191_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3191_end_0 = const()[name = tensor<string, []>("op_3191_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3191_end_mask_0 = const()[name = tensor<string, []>("op_3191_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3191_cast_fp16 = slice_by_index(begin = var_3191_begin_0, end = var_3191_end_0, end_mask = var_3191_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3191_cast_fp16")];
+            tensor<int32, [4]> var_3195_begin_0 = const()[name = tensor<string, []>("op_3195_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3195_end_0 = const()[name = tensor<string, []>("op_3195_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3195_end_mask_0 = const()[name = tensor<string, []>("op_3195_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3195_cast_fp16 = slice_by_index(begin = var_3195_begin_0, end = var_3195_end_0, end_mask = var_3195_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3195_cast_fp16")];
+            tensor<int32, [4]> var_3199_begin_0 = const()[name = tensor<string, []>("op_3199_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3199_end_0 = const()[name = tensor<string, []>("op_3199_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3199_end_mask_0 = const()[name = tensor<string, []>("op_3199_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3199_cast_fp16 = slice_by_index(begin = var_3199_begin_0, end = var_3199_end_0, end_mask = var_3199_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3199_cast_fp16")];
+            tensor<int32, [4]> var_3203_begin_0 = const()[name = tensor<string, []>("op_3203_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3203_end_0 = const()[name = tensor<string, []>("op_3203_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3203_end_mask_0 = const()[name = tensor<string, []>("op_3203_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3203_cast_fp16 = slice_by_index(begin = var_3203_begin_0, end = var_3203_end_0, end_mask = var_3203_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3203_cast_fp16")];
+            tensor<int32, [4]> var_3207_begin_0 = const()[name = tensor<string, []>("op_3207_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3207_end_0 = const()[name = tensor<string, []>("op_3207_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3207_end_mask_0 = const()[name = tensor<string, []>("op_3207_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3207_cast_fp16 = slice_by_index(begin = var_3207_begin_0, end = var_3207_end_0, end_mask = var_3207_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_3207_cast_fp16")];
+            tensor<string, []> var_3211_equation_0 = const()[name = tensor<string, []>("op_3211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3211_cast_fp16 = einsum(equation = var_3211_equation_0, values = (var_3149_cast_fp16, var_2927_cast_fp16))[name = tensor<string, []>("op_3211_cast_fp16")];
+            tensor<fp16, []> var_3212_to_fp16 = const()[name = tensor<string, []>("op_3212_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_257_cast_fp16 = mul(x = var_3211_cast_fp16, y = var_3212_to_fp16)[name = tensor<string, []>("aw_chunk_257_cast_fp16")];
+            tensor<string, []> var_3215_equation_0 = const()[name = tensor<string, []>("op_3215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3215_cast_fp16 = einsum(equation = var_3215_equation_0, values = (var_3149_cast_fp16, var_2934_cast_fp16))[name = tensor<string, []>("op_3215_cast_fp16")];
+            tensor<fp16, []> var_3216_to_fp16 = const()[name = tensor<string, []>("op_3216_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_259_cast_fp16 = mul(x = var_3215_cast_fp16, y = var_3216_to_fp16)[name = tensor<string, []>("aw_chunk_259_cast_fp16")];
+            tensor<string, []> var_3219_equation_0 = const()[name = tensor<string, []>("op_3219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3219_cast_fp16 = einsum(equation = var_3219_equation_0, values = (var_3149_cast_fp16, var_2941_cast_fp16))[name = tensor<string, []>("op_3219_cast_fp16")];
+            tensor<fp16, []> var_3220_to_fp16 = const()[name = tensor<string, []>("op_3220_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_261_cast_fp16 = mul(x = var_3219_cast_fp16, y = var_3220_to_fp16)[name = tensor<string, []>("aw_chunk_261_cast_fp16")];
+            tensor<string, []> var_3223_equation_0 = const()[name = tensor<string, []>("op_3223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3223_cast_fp16 = einsum(equation = var_3223_equation_0, values = (var_3149_cast_fp16, var_2948_cast_fp16))[name = tensor<string, []>("op_3223_cast_fp16")];
+            tensor<fp16, []> var_3224_to_fp16 = const()[name = tensor<string, []>("op_3224_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_263_cast_fp16 = mul(x = var_3223_cast_fp16, y = var_3224_to_fp16)[name = tensor<string, []>("aw_chunk_263_cast_fp16")];
+            tensor<string, []> var_3227_equation_0 = const()[name = tensor<string, []>("op_3227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3227_cast_fp16 = einsum(equation = var_3227_equation_0, values = (var_3153_cast_fp16, var_2955_cast_fp16))[name = tensor<string, []>("op_3227_cast_fp16")];
+            tensor<fp16, []> var_3228_to_fp16 = const()[name = tensor<string, []>("op_3228_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_265_cast_fp16 = mul(x = var_3227_cast_fp16, y = var_3228_to_fp16)[name = tensor<string, []>("aw_chunk_265_cast_fp16")];
+            tensor<string, []> var_3231_equation_0 = const()[name = tensor<string, []>("op_3231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3231_cast_fp16 = einsum(equation = var_3231_equation_0, values = (var_3153_cast_fp16, var_2962_cast_fp16))[name = tensor<string, []>("op_3231_cast_fp16")];
+            tensor<fp16, []> var_3232_to_fp16 = const()[name = tensor<string, []>("op_3232_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_267_cast_fp16 = mul(x = var_3231_cast_fp16, y = var_3232_to_fp16)[name = tensor<string, []>("aw_chunk_267_cast_fp16")];
+            tensor<string, []> var_3235_equation_0 = const()[name = tensor<string, []>("op_3235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3235_cast_fp16 = einsum(equation = var_3235_equation_0, values = (var_3153_cast_fp16, var_2969_cast_fp16))[name = tensor<string, []>("op_3235_cast_fp16")];
+            tensor<fp16, []> var_3236_to_fp16 = const()[name = tensor<string, []>("op_3236_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_269_cast_fp16 = mul(x = var_3235_cast_fp16, y = var_3236_to_fp16)[name = tensor<string, []>("aw_chunk_269_cast_fp16")];
+            tensor<string, []> var_3239_equation_0 = const()[name = tensor<string, []>("op_3239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3239_cast_fp16 = einsum(equation = var_3239_equation_0, values = (var_3153_cast_fp16, var_2976_cast_fp16))[name = tensor<string, []>("op_3239_cast_fp16")];
+            tensor<fp16, []> var_3240_to_fp16 = const()[name = tensor<string, []>("op_3240_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_271_cast_fp16 = mul(x = var_3239_cast_fp16, y = var_3240_to_fp16)[name = tensor<string, []>("aw_chunk_271_cast_fp16")];
+            tensor<string, []> var_3243_equation_0 = const()[name = tensor<string, []>("op_3243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3243_cast_fp16 = einsum(equation = var_3243_equation_0, values = (var_3157_cast_fp16, var_2983_cast_fp16))[name = tensor<string, []>("op_3243_cast_fp16")];
+            tensor<fp16, []> var_3244_to_fp16 = const()[name = tensor<string, []>("op_3244_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_273_cast_fp16 = mul(x = var_3243_cast_fp16, y = var_3244_to_fp16)[name = tensor<string, []>("aw_chunk_273_cast_fp16")];
+            tensor<string, []> var_3247_equation_0 = const()[name = tensor<string, []>("op_3247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3247_cast_fp16 = einsum(equation = var_3247_equation_0, values = (var_3157_cast_fp16, var_2990_cast_fp16))[name = tensor<string, []>("op_3247_cast_fp16")];
+            tensor<fp16, []> var_3248_to_fp16 = const()[name = tensor<string, []>("op_3248_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_275_cast_fp16 = mul(x = var_3247_cast_fp16, y = var_3248_to_fp16)[name = tensor<string, []>("aw_chunk_275_cast_fp16")];
+            tensor<string, []> var_3251_equation_0 = const()[name = tensor<string, []>("op_3251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3251_cast_fp16 = einsum(equation = var_3251_equation_0, values = (var_3157_cast_fp16, var_2997_cast_fp16))[name = tensor<string, []>("op_3251_cast_fp16")];
+            tensor<fp16, []> var_3252_to_fp16 = const()[name = tensor<string, []>("op_3252_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_277_cast_fp16 = mul(x = var_3251_cast_fp16, y = var_3252_to_fp16)[name = tensor<string, []>("aw_chunk_277_cast_fp16")];
+            tensor<string, []> var_3255_equation_0 = const()[name = tensor<string, []>("op_3255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3255_cast_fp16 = einsum(equation = var_3255_equation_0, values = (var_3157_cast_fp16, var_3004_cast_fp16))[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<fp16, []> var_3256_to_fp16 = const()[name = tensor<string, []>("op_3256_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_279_cast_fp16 = mul(x = var_3255_cast_fp16, y = var_3256_to_fp16)[name = tensor<string, []>("aw_chunk_279_cast_fp16")];
+            tensor<string, []> var_3259_equation_0 = const()[name = tensor<string, []>("op_3259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3259_cast_fp16 = einsum(equation = var_3259_equation_0, values = (var_3161_cast_fp16, var_3011_cast_fp16))[name = tensor<string, []>("op_3259_cast_fp16")];
+            tensor<fp16, []> var_3260_to_fp16 = const()[name = tensor<string, []>("op_3260_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_281_cast_fp16 = mul(x = var_3259_cast_fp16, y = var_3260_to_fp16)[name = tensor<string, []>("aw_chunk_281_cast_fp16")];
+            tensor<string, []> var_3263_equation_0 = const()[name = tensor<string, []>("op_3263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3263_cast_fp16 = einsum(equation = var_3263_equation_0, values = (var_3161_cast_fp16, var_3018_cast_fp16))[name = tensor<string, []>("op_3263_cast_fp16")];
+            tensor<fp16, []> var_3264_to_fp16 = const()[name = tensor<string, []>("op_3264_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_283_cast_fp16 = mul(x = var_3263_cast_fp16, y = var_3264_to_fp16)[name = tensor<string, []>("aw_chunk_283_cast_fp16")];
+            tensor<string, []> var_3267_equation_0 = const()[name = tensor<string, []>("op_3267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3267_cast_fp16 = einsum(equation = var_3267_equation_0, values = (var_3161_cast_fp16, var_3025_cast_fp16))[name = tensor<string, []>("op_3267_cast_fp16")];
+            tensor<fp16, []> var_3268_to_fp16 = const()[name = tensor<string, []>("op_3268_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_285_cast_fp16 = mul(x = var_3267_cast_fp16, y = var_3268_to_fp16)[name = tensor<string, []>("aw_chunk_285_cast_fp16")];
+            tensor<string, []> var_3271_equation_0 = const()[name = tensor<string, []>("op_3271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3271_cast_fp16 = einsum(equation = var_3271_equation_0, values = (var_3161_cast_fp16, var_3032_cast_fp16))[name = tensor<string, []>("op_3271_cast_fp16")];
+            tensor<fp16, []> var_3272_to_fp16 = const()[name = tensor<string, []>("op_3272_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_287_cast_fp16 = mul(x = var_3271_cast_fp16, y = var_3272_to_fp16)[name = tensor<string, []>("aw_chunk_287_cast_fp16")];
+            tensor<string, []> var_3275_equation_0 = const()[name = tensor<string, []>("op_3275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3275_cast_fp16 = einsum(equation = var_3275_equation_0, values = (var_3165_cast_fp16, var_3039_cast_fp16))[name = tensor<string, []>("op_3275_cast_fp16")];
+            tensor<fp16, []> var_3276_to_fp16 = const()[name = tensor<string, []>("op_3276_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_289_cast_fp16 = mul(x = var_3275_cast_fp16, y = var_3276_to_fp16)[name = tensor<string, []>("aw_chunk_289_cast_fp16")];
+            tensor<string, []> var_3279_equation_0 = const()[name = tensor<string, []>("op_3279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3279_cast_fp16 = einsum(equation = var_3279_equation_0, values = (var_3165_cast_fp16, var_3046_cast_fp16))[name = tensor<string, []>("op_3279_cast_fp16")];
+            tensor<fp16, []> var_3280_to_fp16 = const()[name = tensor<string, []>("op_3280_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_291_cast_fp16 = mul(x = var_3279_cast_fp16, y = var_3280_to_fp16)[name = tensor<string, []>("aw_chunk_291_cast_fp16")];
+            tensor<string, []> var_3283_equation_0 = const()[name = tensor<string, []>("op_3283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3283_cast_fp16 = einsum(equation = var_3283_equation_0, values = (var_3165_cast_fp16, var_3053_cast_fp16))[name = tensor<string, []>("op_3283_cast_fp16")];
+            tensor<fp16, []> var_3284_to_fp16 = const()[name = tensor<string, []>("op_3284_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_293_cast_fp16 = mul(x = var_3283_cast_fp16, y = var_3284_to_fp16)[name = tensor<string, []>("aw_chunk_293_cast_fp16")];
+            tensor<string, []> var_3287_equation_0 = const()[name = tensor<string, []>("op_3287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3287_cast_fp16 = einsum(equation = var_3287_equation_0, values = (var_3165_cast_fp16, var_3060_cast_fp16))[name = tensor<string, []>("op_3287_cast_fp16")];
+            tensor<fp16, []> var_3288_to_fp16 = const()[name = tensor<string, []>("op_3288_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_295_cast_fp16 = mul(x = var_3287_cast_fp16, y = var_3288_to_fp16)[name = tensor<string, []>("aw_chunk_295_cast_fp16")];
+            tensor<string, []> var_3291_equation_0 = const()[name = tensor<string, []>("op_3291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3291_cast_fp16 = einsum(equation = var_3291_equation_0, values = (var_3169_cast_fp16, var_3067_cast_fp16))[name = tensor<string, []>("op_3291_cast_fp16")];
+            tensor<fp16, []> var_3292_to_fp16 = const()[name = tensor<string, []>("op_3292_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_297_cast_fp16 = mul(x = var_3291_cast_fp16, y = var_3292_to_fp16)[name = tensor<string, []>("aw_chunk_297_cast_fp16")];
+            tensor<string, []> var_3295_equation_0 = const()[name = tensor<string, []>("op_3295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3295_cast_fp16 = einsum(equation = var_3295_equation_0, values = (var_3169_cast_fp16, var_3074_cast_fp16))[name = tensor<string, []>("op_3295_cast_fp16")];
+            tensor<fp16, []> var_3296_to_fp16 = const()[name = tensor<string, []>("op_3296_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_299_cast_fp16 = mul(x = var_3295_cast_fp16, y = var_3296_to_fp16)[name = tensor<string, []>("aw_chunk_299_cast_fp16")];
+            tensor<string, []> var_3299_equation_0 = const()[name = tensor<string, []>("op_3299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3299_cast_fp16 = einsum(equation = var_3299_equation_0, values = (var_3169_cast_fp16, var_3081_cast_fp16))[name = tensor<string, []>("op_3299_cast_fp16")];
+            tensor<fp16, []> var_3300_to_fp16 = const()[name = tensor<string, []>("op_3300_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_301_cast_fp16 = mul(x = var_3299_cast_fp16, y = var_3300_to_fp16)[name = tensor<string, []>("aw_chunk_301_cast_fp16")];
+            tensor<string, []> var_3303_equation_0 = const()[name = tensor<string, []>("op_3303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3303_cast_fp16 = einsum(equation = var_3303_equation_0, values = (var_3169_cast_fp16, var_3088_cast_fp16))[name = tensor<string, []>("op_3303_cast_fp16")];
+            tensor<fp16, []> var_3304_to_fp16 = const()[name = tensor<string, []>("op_3304_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_303_cast_fp16 = mul(x = var_3303_cast_fp16, y = var_3304_to_fp16)[name = tensor<string, []>("aw_chunk_303_cast_fp16")];
+            tensor<string, []> var_3307_equation_0 = const()[name = tensor<string, []>("op_3307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3307_cast_fp16 = einsum(equation = var_3307_equation_0, values = (var_3173_cast_fp16, var_3095_cast_fp16))[name = tensor<string, []>("op_3307_cast_fp16")];
+            tensor<fp16, []> var_3308_to_fp16 = const()[name = tensor<string, []>("op_3308_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_305_cast_fp16 = mul(x = var_3307_cast_fp16, y = var_3308_to_fp16)[name = tensor<string, []>("aw_chunk_305_cast_fp16")];
+            tensor<string, []> var_3311_equation_0 = const()[name = tensor<string, []>("op_3311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3311_cast_fp16 = einsum(equation = var_3311_equation_0, values = (var_3173_cast_fp16, var_3102_cast_fp16))[name = tensor<string, []>("op_3311_cast_fp16")];
+            tensor<fp16, []> var_3312_to_fp16 = const()[name = tensor<string, []>("op_3312_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_307_cast_fp16 = mul(x = var_3311_cast_fp16, y = var_3312_to_fp16)[name = tensor<string, []>("aw_chunk_307_cast_fp16")];
+            tensor<string, []> var_3315_equation_0 = const()[name = tensor<string, []>("op_3315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3315_cast_fp16 = einsum(equation = var_3315_equation_0, values = (var_3173_cast_fp16, var_3109_cast_fp16))[name = tensor<string, []>("op_3315_cast_fp16")];
+            tensor<fp16, []> var_3316_to_fp16 = const()[name = tensor<string, []>("op_3316_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_309_cast_fp16 = mul(x = var_3315_cast_fp16, y = var_3316_to_fp16)[name = tensor<string, []>("aw_chunk_309_cast_fp16")];
+            tensor<string, []> var_3319_equation_0 = const()[name = tensor<string, []>("op_3319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3319_cast_fp16 = einsum(equation = var_3319_equation_0, values = (var_3173_cast_fp16, var_3116_cast_fp16))[name = tensor<string, []>("op_3319_cast_fp16")];
+            tensor<fp16, []> var_3320_to_fp16 = const()[name = tensor<string, []>("op_3320_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_311_cast_fp16 = mul(x = var_3319_cast_fp16, y = var_3320_to_fp16)[name = tensor<string, []>("aw_chunk_311_cast_fp16")];
+            tensor<string, []> var_3323_equation_0 = const()[name = tensor<string, []>("op_3323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3323_cast_fp16 = einsum(equation = var_3323_equation_0, values = (var_3177_cast_fp16, var_3123_cast_fp16))[name = tensor<string, []>("op_3323_cast_fp16")];
+            tensor<fp16, []> var_3324_to_fp16 = const()[name = tensor<string, []>("op_3324_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_313_cast_fp16 = mul(x = var_3323_cast_fp16, y = var_3324_to_fp16)[name = tensor<string, []>("aw_chunk_313_cast_fp16")];
+            tensor<string, []> var_3327_equation_0 = const()[name = tensor<string, []>("op_3327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3327_cast_fp16 = einsum(equation = var_3327_equation_0, values = (var_3177_cast_fp16, var_3130_cast_fp16))[name = tensor<string, []>("op_3327_cast_fp16")];
+            tensor<fp16, []> var_3328_to_fp16 = const()[name = tensor<string, []>("op_3328_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_315_cast_fp16 = mul(x = var_3327_cast_fp16, y = var_3328_to_fp16)[name = tensor<string, []>("aw_chunk_315_cast_fp16")];
+            tensor<string, []> var_3331_equation_0 = const()[name = tensor<string, []>("op_3331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3331_cast_fp16 = einsum(equation = var_3331_equation_0, values = (var_3177_cast_fp16, var_3137_cast_fp16))[name = tensor<string, []>("op_3331_cast_fp16")];
+            tensor<fp16, []> var_3332_to_fp16 = const()[name = tensor<string, []>("op_3332_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_317_cast_fp16 = mul(x = var_3331_cast_fp16, y = var_3332_to_fp16)[name = tensor<string, []>("aw_chunk_317_cast_fp16")];
+            tensor<string, []> var_3335_equation_0 = const()[name = tensor<string, []>("op_3335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3335_cast_fp16 = einsum(equation = var_3335_equation_0, values = (var_3177_cast_fp16, var_3144_cast_fp16))[name = tensor<string, []>("op_3335_cast_fp16")];
+            tensor<fp16, []> var_3336_to_fp16 = const()[name = tensor<string, []>("op_3336_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_319_cast_fp16 = mul(x = var_3335_cast_fp16, y = var_3336_to_fp16)[name = tensor<string, []>("aw_chunk_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3338_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_257_cast_fp16)[name = tensor<string, []>("op_3338_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3339_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_259_cast_fp16)[name = tensor<string, []>("op_3339_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3340_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_261_cast_fp16)[name = tensor<string, []>("op_3340_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3341_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_263_cast_fp16)[name = tensor<string, []>("op_3341_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3342_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_265_cast_fp16)[name = tensor<string, []>("op_3342_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3343_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_267_cast_fp16)[name = tensor<string, []>("op_3343_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3344_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_269_cast_fp16)[name = tensor<string, []>("op_3344_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3345_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_271_cast_fp16)[name = tensor<string, []>("op_3345_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3346_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_273_cast_fp16)[name = tensor<string, []>("op_3346_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3347_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_275_cast_fp16)[name = tensor<string, []>("op_3347_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3348_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_277_cast_fp16)[name = tensor<string, []>("op_3348_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3349_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_279_cast_fp16)[name = tensor<string, []>("op_3349_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3350_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_281_cast_fp16)[name = tensor<string, []>("op_3350_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3351_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_283_cast_fp16)[name = tensor<string, []>("op_3351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3352_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_285_cast_fp16)[name = tensor<string, []>("op_3352_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3353_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_287_cast_fp16)[name = tensor<string, []>("op_3353_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3354_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_289_cast_fp16)[name = tensor<string, []>("op_3354_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3355_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_291_cast_fp16)[name = tensor<string, []>("op_3355_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3356_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_293_cast_fp16)[name = tensor<string, []>("op_3356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3357_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_295_cast_fp16)[name = tensor<string, []>("op_3357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3358_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_297_cast_fp16)[name = tensor<string, []>("op_3358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3359_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_299_cast_fp16)[name = tensor<string, []>("op_3359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3360_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_301_cast_fp16)[name = tensor<string, []>("op_3360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3361_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_303_cast_fp16)[name = tensor<string, []>("op_3361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3362_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_305_cast_fp16)[name = tensor<string, []>("op_3362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3363_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_307_cast_fp16)[name = tensor<string, []>("op_3363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3364_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_309_cast_fp16)[name = tensor<string, []>("op_3364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3365_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_311_cast_fp16)[name = tensor<string, []>("op_3365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3366_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_313_cast_fp16)[name = tensor<string, []>("op_3366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3367_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_315_cast_fp16)[name = tensor<string, []>("op_3367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3368_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_317_cast_fp16)[name = tensor<string, []>("op_3368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3369_cast_fp16 = softmax(axis = var_2835, x = aw_chunk_319_cast_fp16)[name = tensor<string, []>("op_3369_cast_fp16")];
+            tensor<string, []> var_3371_equation_0 = const()[name = tensor<string, []>("op_3371_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3371_cast_fp16 = einsum(equation = var_3371_equation_0, values = (var_3179_cast_fp16, var_3338_cast_fp16))[name = tensor<string, []>("op_3371_cast_fp16")];
+            tensor<string, []> var_3373_equation_0 = const()[name = tensor<string, []>("op_3373_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3373_cast_fp16 = einsum(equation = var_3373_equation_0, values = (var_3179_cast_fp16, var_3339_cast_fp16))[name = tensor<string, []>("op_3373_cast_fp16")];
+            tensor<string, []> var_3375_equation_0 = const()[name = tensor<string, []>("op_3375_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3375_cast_fp16 = einsum(equation = var_3375_equation_0, values = (var_3179_cast_fp16, var_3340_cast_fp16))[name = tensor<string, []>("op_3375_cast_fp16")];
+            tensor<string, []> var_3377_equation_0 = const()[name = tensor<string, []>("op_3377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3377_cast_fp16 = einsum(equation = var_3377_equation_0, values = (var_3179_cast_fp16, var_3341_cast_fp16))[name = tensor<string, []>("op_3377_cast_fp16")];
+            tensor<string, []> var_3379_equation_0 = const()[name = tensor<string, []>("op_3379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3379_cast_fp16 = einsum(equation = var_3379_equation_0, values = (var_3183_cast_fp16, var_3342_cast_fp16))[name = tensor<string, []>("op_3379_cast_fp16")];
+            tensor<string, []> var_3381_equation_0 = const()[name = tensor<string, []>("op_3381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3381_cast_fp16 = einsum(equation = var_3381_equation_0, values = (var_3183_cast_fp16, var_3343_cast_fp16))[name = tensor<string, []>("op_3381_cast_fp16")];
+            tensor<string, []> var_3383_equation_0 = const()[name = tensor<string, []>("op_3383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3383_cast_fp16 = einsum(equation = var_3383_equation_0, values = (var_3183_cast_fp16, var_3344_cast_fp16))[name = tensor<string, []>("op_3383_cast_fp16")];
+            tensor<string, []> var_3385_equation_0 = const()[name = tensor<string, []>("op_3385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3385_cast_fp16 = einsum(equation = var_3385_equation_0, values = (var_3183_cast_fp16, var_3345_cast_fp16))[name = tensor<string, []>("op_3385_cast_fp16")];
+            tensor<string, []> var_3387_equation_0 = const()[name = tensor<string, []>("op_3387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3387_cast_fp16 = einsum(equation = var_3387_equation_0, values = (var_3187_cast_fp16, var_3346_cast_fp16))[name = tensor<string, []>("op_3387_cast_fp16")];
+            tensor<string, []> var_3389_equation_0 = const()[name = tensor<string, []>("op_3389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3389_cast_fp16 = einsum(equation = var_3389_equation_0, values = (var_3187_cast_fp16, var_3347_cast_fp16))[name = tensor<string, []>("op_3389_cast_fp16")];
+            tensor<string, []> var_3391_equation_0 = const()[name = tensor<string, []>("op_3391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3391_cast_fp16 = einsum(equation = var_3391_equation_0, values = (var_3187_cast_fp16, var_3348_cast_fp16))[name = tensor<string, []>("op_3391_cast_fp16")];
+            tensor<string, []> var_3393_equation_0 = const()[name = tensor<string, []>("op_3393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3393_cast_fp16 = einsum(equation = var_3393_equation_0, values = (var_3187_cast_fp16, var_3349_cast_fp16))[name = tensor<string, []>("op_3393_cast_fp16")];
+            tensor<string, []> var_3395_equation_0 = const()[name = tensor<string, []>("op_3395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3395_cast_fp16 = einsum(equation = var_3395_equation_0, values = (var_3191_cast_fp16, var_3350_cast_fp16))[name = tensor<string, []>("op_3395_cast_fp16")];
+            tensor<string, []> var_3397_equation_0 = const()[name = tensor<string, []>("op_3397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3397_cast_fp16 = einsum(equation = var_3397_equation_0, values = (var_3191_cast_fp16, var_3351_cast_fp16))[name = tensor<string, []>("op_3397_cast_fp16")];
+            tensor<string, []> var_3399_equation_0 = const()[name = tensor<string, []>("op_3399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3399_cast_fp16 = einsum(equation = var_3399_equation_0, values = (var_3191_cast_fp16, var_3352_cast_fp16))[name = tensor<string, []>("op_3399_cast_fp16")];
+            tensor<string, []> var_3401_equation_0 = const()[name = tensor<string, []>("op_3401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3401_cast_fp16 = einsum(equation = var_3401_equation_0, values = (var_3191_cast_fp16, var_3353_cast_fp16))[name = tensor<string, []>("op_3401_cast_fp16")];
+            tensor<string, []> var_3403_equation_0 = const()[name = tensor<string, []>("op_3403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3403_cast_fp16 = einsum(equation = var_3403_equation_0, values = (var_3195_cast_fp16, var_3354_cast_fp16))[name = tensor<string, []>("op_3403_cast_fp16")];
+            tensor<string, []> var_3405_equation_0 = const()[name = tensor<string, []>("op_3405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3405_cast_fp16 = einsum(equation = var_3405_equation_0, values = (var_3195_cast_fp16, var_3355_cast_fp16))[name = tensor<string, []>("op_3405_cast_fp16")];
+            tensor<string, []> var_3407_equation_0 = const()[name = tensor<string, []>("op_3407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3407_cast_fp16 = einsum(equation = var_3407_equation_0, values = (var_3195_cast_fp16, var_3356_cast_fp16))[name = tensor<string, []>("op_3407_cast_fp16")];
+            tensor<string, []> var_3409_equation_0 = const()[name = tensor<string, []>("op_3409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3409_cast_fp16 = einsum(equation = var_3409_equation_0, values = (var_3195_cast_fp16, var_3357_cast_fp16))[name = tensor<string, []>("op_3409_cast_fp16")];
+            tensor<string, []> var_3411_equation_0 = const()[name = tensor<string, []>("op_3411_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3411_cast_fp16 = einsum(equation = var_3411_equation_0, values = (var_3199_cast_fp16, var_3358_cast_fp16))[name = tensor<string, []>("op_3411_cast_fp16")];
+            tensor<string, []> var_3413_equation_0 = const()[name = tensor<string, []>("op_3413_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3413_cast_fp16 = einsum(equation = var_3413_equation_0, values = (var_3199_cast_fp16, var_3359_cast_fp16))[name = tensor<string, []>("op_3413_cast_fp16")];
+            tensor<string, []> var_3415_equation_0 = const()[name = tensor<string, []>("op_3415_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3415_cast_fp16 = einsum(equation = var_3415_equation_0, values = (var_3199_cast_fp16, var_3360_cast_fp16))[name = tensor<string, []>("op_3415_cast_fp16")];
+            tensor<string, []> var_3417_equation_0 = const()[name = tensor<string, []>("op_3417_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3417_cast_fp16 = einsum(equation = var_3417_equation_0, values = (var_3199_cast_fp16, var_3361_cast_fp16))[name = tensor<string, []>("op_3417_cast_fp16")];
+            tensor<string, []> var_3419_equation_0 = const()[name = tensor<string, []>("op_3419_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3419_cast_fp16 = einsum(equation = var_3419_equation_0, values = (var_3203_cast_fp16, var_3362_cast_fp16))[name = tensor<string, []>("op_3419_cast_fp16")];
+            tensor<string, []> var_3421_equation_0 = const()[name = tensor<string, []>("op_3421_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3421_cast_fp16 = einsum(equation = var_3421_equation_0, values = (var_3203_cast_fp16, var_3363_cast_fp16))[name = tensor<string, []>("op_3421_cast_fp16")];
+            tensor<string, []> var_3423_equation_0 = const()[name = tensor<string, []>("op_3423_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3423_cast_fp16 = einsum(equation = var_3423_equation_0, values = (var_3203_cast_fp16, var_3364_cast_fp16))[name = tensor<string, []>("op_3423_cast_fp16")];
+            tensor<string, []> var_3425_equation_0 = const()[name = tensor<string, []>("op_3425_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3425_cast_fp16 = einsum(equation = var_3425_equation_0, values = (var_3203_cast_fp16, var_3365_cast_fp16))[name = tensor<string, []>("op_3425_cast_fp16")];
+            tensor<string, []> var_3427_equation_0 = const()[name = tensor<string, []>("op_3427_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3427_cast_fp16 = einsum(equation = var_3427_equation_0, values = (var_3207_cast_fp16, var_3366_cast_fp16))[name = tensor<string, []>("op_3427_cast_fp16")];
+            tensor<string, []> var_3429_equation_0 = const()[name = tensor<string, []>("op_3429_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3429_cast_fp16 = einsum(equation = var_3429_equation_0, values = (var_3207_cast_fp16, var_3367_cast_fp16))[name = tensor<string, []>("op_3429_cast_fp16")];
+            tensor<string, []> var_3431_equation_0 = const()[name = tensor<string, []>("op_3431_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3431_cast_fp16 = einsum(equation = var_3431_equation_0, values = (var_3207_cast_fp16, var_3368_cast_fp16))[name = tensor<string, []>("op_3431_cast_fp16")];
+            tensor<string, []> var_3433_equation_0 = const()[name = tensor<string, []>("op_3433_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3433_cast_fp16 = einsum(equation = var_3433_equation_0, values = (var_3207_cast_fp16, var_3369_cast_fp16))[name = tensor<string, []>("op_3433_cast_fp16")];
+            tensor<bool, []> var_3435_interleave_0 = const()[name = tensor<string, []>("op_3435_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3435_cast_fp16 = concat(axis = var_2822, interleave = var_3435_interleave_0, values = (var_3371_cast_fp16, var_3373_cast_fp16, var_3375_cast_fp16, var_3377_cast_fp16))[name = tensor<string, []>("op_3435_cast_fp16")];
+            tensor<bool, []> var_3437_interleave_0 = const()[name = tensor<string, []>("op_3437_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3437_cast_fp16 = concat(axis = var_2822, interleave = var_3437_interleave_0, values = (var_3379_cast_fp16, var_3381_cast_fp16, var_3383_cast_fp16, var_3385_cast_fp16))[name = tensor<string, []>("op_3437_cast_fp16")];
+            tensor<bool, []> var_3439_interleave_0 = const()[name = tensor<string, []>("op_3439_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3439_cast_fp16 = concat(axis = var_2822, interleave = var_3439_interleave_0, values = (var_3387_cast_fp16, var_3389_cast_fp16, var_3391_cast_fp16, var_3393_cast_fp16))[name = tensor<string, []>("op_3439_cast_fp16")];
+            tensor<bool, []> var_3441_interleave_0 = const()[name = tensor<string, []>("op_3441_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3441_cast_fp16 = concat(axis = var_2822, interleave = var_3441_interleave_0, values = (var_3395_cast_fp16, var_3397_cast_fp16, var_3399_cast_fp16, var_3401_cast_fp16))[name = tensor<string, []>("op_3441_cast_fp16")];
+            tensor<bool, []> var_3443_interleave_0 = const()[name = tensor<string, []>("op_3443_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3443_cast_fp16 = concat(axis = var_2822, interleave = var_3443_interleave_0, values = (var_3403_cast_fp16, var_3405_cast_fp16, var_3407_cast_fp16, var_3409_cast_fp16))[name = tensor<string, []>("op_3443_cast_fp16")];
+            tensor<bool, []> var_3445_interleave_0 = const()[name = tensor<string, []>("op_3445_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3445_cast_fp16 = concat(axis = var_2822, interleave = var_3445_interleave_0, values = (var_3411_cast_fp16, var_3413_cast_fp16, var_3415_cast_fp16, var_3417_cast_fp16))[name = tensor<string, []>("op_3445_cast_fp16")];
+            tensor<bool, []> var_3447_interleave_0 = const()[name = tensor<string, []>("op_3447_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16 = concat(axis = var_2822, interleave = var_3447_interleave_0, values = (var_3419_cast_fp16, var_3421_cast_fp16, var_3423_cast_fp16, var_3425_cast_fp16))[name = tensor<string, []>("op_3447_cast_fp16")];
+            tensor<bool, []> var_3449_interleave_0 = const()[name = tensor<string, []>("op_3449_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3449_cast_fp16 = concat(axis = var_2822, interleave = var_3449_interleave_0, values = (var_3427_cast_fp16, var_3429_cast_fp16, var_3431_cast_fp16, var_3433_cast_fp16))[name = tensor<string, []>("op_3449_cast_fp16")];
+            tensor<bool, []> input_33_interleave_0 = const()[name = tensor<string, []>("input_33_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_33_cast_fp16 = concat(axis = var_2835, interleave = input_33_interleave_0, values = (var_3435_cast_fp16, var_3437_cast_fp16, var_3439_cast_fp16, var_3441_cast_fp16, var_3443_cast_fp16, var_3445_cast_fp16, var_3447_cast_fp16, var_3449_cast_fp16))[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_3454 = const()[name = tensor<string, []>("op_3454"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3456 = const()[name = tensor<string, []>("op_3456"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30155456)))];
+            tensor<fp16, [512]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30679808)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_3456, groups = var_2835, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_3454, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> var_3462 = const()[name = tensor<string, []>("op_3462"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_3462, keep_dims = var_2836, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_3466 = const()[name = tensor<string, []>("op_3466"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3467_cast_fp16 = reduce_mean(axes = var_3466, keep_dims = var_2836, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_3467_cast_fp16")];
+            tensor<fp16, []> var_3468_to_fp16 = const()[name = tensor<string, []>("op_3468_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3469_cast_fp16 = add(x = var_3467_cast_fp16, y = var_3468_to_fp16)[name = tensor<string, []>("op_3469_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_3469_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [512]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30680896)))];
+            tensor<fp16, [512]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30681984)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_3480 = const()[name = tensor<string, []>("op_3480"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3482 = const()[name = tensor<string, []>("op_3482"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30683072)))];
+            tensor<fp16, [2048]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32780288)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_3482, groups = var_2835, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_3480, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_3488 = const()[name = tensor<string, []>("op_3488"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3490 = const()[name = tensor<string, []>("op_3490"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32784448)))];
+            tensor<fp16, [512]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34881664)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_3490, groups = var_2835, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_3488, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_3497 = const()[name = tensor<string, []>("op_3497"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_3510 = const()[name = tensor<string, []>("op_3510"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_3511 = const()[name = tensor<string, []>("op_3511"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_3521 = const()[name = tensor<string, []>("op_3521"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_3521, keep_dims = var_3511, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_3525 = const()[name = tensor<string, []>("op_3525"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3526_cast_fp16 = reduce_mean(axes = var_3525, keep_dims = var_3511, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_3526_cast_fp16")];
+            tensor<fp16, []> var_3527_to_fp16 = const()[name = tensor<string, []>("op_3527_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3528_cast_fp16 = add(x = var_3526_cast_fp16, y = var_3527_to_fp16)[name = tensor<string, []>("op_3528_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_3528_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [512]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34882752)))];
+            tensor<fp16, [512]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34883840)))];
+            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<int32, [2]> var_3543 = const()[name = tensor<string, []>("op_3543"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3545 = const()[name = tensor<string, []>("op_3545"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34884928)))];
+            tensor<fp16, [512]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35409280)))];
+            tensor<fp16, [1, 512, 1, 1500]> query_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_3545, groups = var_3510, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_3543, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_3549 = const()[name = tensor<string, []>("op_3549"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3551 = const()[name = tensor<string, []>("op_3551"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35410368)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_cast_fp16 = conv(dilations = var_3551, groups = var_3510, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_3549, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_3556 = const()[name = tensor<string, []>("op_3556"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3558 = const()[name = tensor<string, []>("op_3558"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35934720)))];
+            tensor<fp16, [512]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36459072)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_3558, groups = var_3510, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_3556, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_3565_begin_0 = const()[name = tensor<string, []>("op_3565_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3565_end_0 = const()[name = tensor<string, []>("op_3565_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3565_end_mask_0 = const()[name = tensor<string, []>("op_3565_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3565_cast_fp16 = slice_by_index(begin = var_3565_begin_0, end = var_3565_end_0, end_mask = var_3565_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<int32, [4]> var_3569_begin_0 = const()[name = tensor<string, []>("op_3569_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3569_end_0 = const()[name = tensor<string, []>("op_3569_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3569_end_mask_0 = const()[name = tensor<string, []>("op_3569_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3569_cast_fp16 = slice_by_index(begin = var_3569_begin_0, end = var_3569_end_0, end_mask = var_3569_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3569_cast_fp16")];
+            tensor<int32, [4]> var_3573_begin_0 = const()[name = tensor<string, []>("op_3573_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3573_end_0 = const()[name = tensor<string, []>("op_3573_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3573_end_mask_0 = const()[name = tensor<string, []>("op_3573_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3573_cast_fp16 = slice_by_index(begin = var_3573_begin_0, end = var_3573_end_0, end_mask = var_3573_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3573_cast_fp16")];
+            tensor<int32, [4]> var_3577_begin_0 = const()[name = tensor<string, []>("op_3577_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3577_end_0 = const()[name = tensor<string, []>("op_3577_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3577_end_mask_0 = const()[name = tensor<string, []>("op_3577_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3577_cast_fp16 = slice_by_index(begin = var_3577_begin_0, end = var_3577_end_0, end_mask = var_3577_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3577_cast_fp16")];
+            tensor<int32, [4]> var_3581_begin_0 = const()[name = tensor<string, []>("op_3581_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3581_end_0 = const()[name = tensor<string, []>("op_3581_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3581_end_mask_0 = const()[name = tensor<string, []>("op_3581_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3581_cast_fp16 = slice_by_index(begin = var_3581_begin_0, end = var_3581_end_0, end_mask = var_3581_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3581_cast_fp16")];
+            tensor<int32, [4]> var_3585_begin_0 = const()[name = tensor<string, []>("op_3585_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3585_end_0 = const()[name = tensor<string, []>("op_3585_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3585_end_mask_0 = const()[name = tensor<string, []>("op_3585_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3585_cast_fp16")];
+            tensor<int32, [4]> var_3589_begin_0 = const()[name = tensor<string, []>("op_3589_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3589_end_0 = const()[name = tensor<string, []>("op_3589_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3589_end_mask_0 = const()[name = tensor<string, []>("op_3589_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3589_cast_fp16 = slice_by_index(begin = var_3589_begin_0, end = var_3589_end_0, end_mask = var_3589_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3589_cast_fp16")];
+            tensor<int32, [4]> var_3593_begin_0 = const()[name = tensor<string, []>("op_3593_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3593_end_0 = const()[name = tensor<string, []>("op_3593_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3593_end_mask_0 = const()[name = tensor<string, []>("op_3593_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3593_cast_fp16 = slice_by_index(begin = var_3593_begin_0, end = var_3593_end_0, end_mask = var_3593_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_3593_cast_fp16")];
+            tensor<int32, [4]> var_3602_begin_0 = const()[name = tensor<string, []>("op_3602_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3602_end_0 = const()[name = tensor<string, []>("op_3602_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3602_end_mask_0 = const()[name = tensor<string, []>("op_3602_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3602_cast_fp16 = slice_by_index(begin = var_3602_begin_0, end = var_3602_end_0, end_mask = var_3602_end_mask_0, x = var_3565_cast_fp16)[name = tensor<string, []>("op_3602_cast_fp16")];
+            tensor<int32, [4]> var_3609_begin_0 = const()[name = tensor<string, []>("op_3609_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3609_end_0 = const()[name = tensor<string, []>("op_3609_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3609_end_mask_0 = const()[name = tensor<string, []>("op_3609_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3609_cast_fp16 = slice_by_index(begin = var_3609_begin_0, end = var_3609_end_0, end_mask = var_3609_end_mask_0, x = var_3565_cast_fp16)[name = tensor<string, []>("op_3609_cast_fp16")];
+            tensor<int32, [4]> var_3616_begin_0 = const()[name = tensor<string, []>("op_3616_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3616_end_0 = const()[name = tensor<string, []>("op_3616_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3616_end_mask_0 = const()[name = tensor<string, []>("op_3616_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3616_cast_fp16 = slice_by_index(begin = var_3616_begin_0, end = var_3616_end_0, end_mask = var_3616_end_mask_0, x = var_3565_cast_fp16)[name = tensor<string, []>("op_3616_cast_fp16")];
+            tensor<int32, [4]> var_3623_begin_0 = const()[name = tensor<string, []>("op_3623_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3623_end_0 = const()[name = tensor<string, []>("op_3623_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3623_end_mask_0 = const()[name = tensor<string, []>("op_3623_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3623_cast_fp16 = slice_by_index(begin = var_3623_begin_0, end = var_3623_end_0, end_mask = var_3623_end_mask_0, x = var_3565_cast_fp16)[name = tensor<string, []>("op_3623_cast_fp16")];
+            tensor<int32, [4]> var_3630_begin_0 = const()[name = tensor<string, []>("op_3630_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3630_end_0 = const()[name = tensor<string, []>("op_3630_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3630_end_mask_0 = const()[name = tensor<string, []>("op_3630_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3630_cast_fp16 = slice_by_index(begin = var_3630_begin_0, end = var_3630_end_0, end_mask = var_3630_end_mask_0, x = var_3569_cast_fp16)[name = tensor<string, []>("op_3630_cast_fp16")];
+            tensor<int32, [4]> var_3637_begin_0 = const()[name = tensor<string, []>("op_3637_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3637_end_0 = const()[name = tensor<string, []>("op_3637_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3637_end_mask_0 = const()[name = tensor<string, []>("op_3637_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3637_cast_fp16 = slice_by_index(begin = var_3637_begin_0, end = var_3637_end_0, end_mask = var_3637_end_mask_0, x = var_3569_cast_fp16)[name = tensor<string, []>("op_3637_cast_fp16")];
+            tensor<int32, [4]> var_3644_begin_0 = const()[name = tensor<string, []>("op_3644_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3644_end_0 = const()[name = tensor<string, []>("op_3644_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3644_end_mask_0 = const()[name = tensor<string, []>("op_3644_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = var_3644_end_0, end_mask = var_3644_end_mask_0, x = var_3569_cast_fp16)[name = tensor<string, []>("op_3644_cast_fp16")];
+            tensor<int32, [4]> var_3651_begin_0 = const()[name = tensor<string, []>("op_3651_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3651_end_0 = const()[name = tensor<string, []>("op_3651_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3651_end_mask_0 = const()[name = tensor<string, []>("op_3651_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3651_cast_fp16 = slice_by_index(begin = var_3651_begin_0, end = var_3651_end_0, end_mask = var_3651_end_mask_0, x = var_3569_cast_fp16)[name = tensor<string, []>("op_3651_cast_fp16")];
+            tensor<int32, [4]> var_3658_begin_0 = const()[name = tensor<string, []>("op_3658_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3658_end_0 = const()[name = tensor<string, []>("op_3658_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3658_end_mask_0 = const()[name = tensor<string, []>("op_3658_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3658_cast_fp16 = slice_by_index(begin = var_3658_begin_0, end = var_3658_end_0, end_mask = var_3658_end_mask_0, x = var_3573_cast_fp16)[name = tensor<string, []>("op_3658_cast_fp16")];
+            tensor<int32, [4]> var_3665_begin_0 = const()[name = tensor<string, []>("op_3665_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3665_end_0 = const()[name = tensor<string, []>("op_3665_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3665_end_mask_0 = const()[name = tensor<string, []>("op_3665_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3665_cast_fp16 = slice_by_index(begin = var_3665_begin_0, end = var_3665_end_0, end_mask = var_3665_end_mask_0, x = var_3573_cast_fp16)[name = tensor<string, []>("op_3665_cast_fp16")];
+            tensor<int32, [4]> var_3672_begin_0 = const()[name = tensor<string, []>("op_3672_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3672_end_0 = const()[name = tensor<string, []>("op_3672_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3672_end_mask_0 = const()[name = tensor<string, []>("op_3672_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3672_cast_fp16 = slice_by_index(begin = var_3672_begin_0, end = var_3672_end_0, end_mask = var_3672_end_mask_0, x = var_3573_cast_fp16)[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<int32, [4]> var_3679_begin_0 = const()[name = tensor<string, []>("op_3679_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3679_end_0 = const()[name = tensor<string, []>("op_3679_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3679_end_mask_0 = const()[name = tensor<string, []>("op_3679_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3679_cast_fp16 = slice_by_index(begin = var_3679_begin_0, end = var_3679_end_0, end_mask = var_3679_end_mask_0, x = var_3573_cast_fp16)[name = tensor<string, []>("op_3679_cast_fp16")];
+            tensor<int32, [4]> var_3686_begin_0 = const()[name = tensor<string, []>("op_3686_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3686_end_0 = const()[name = tensor<string, []>("op_3686_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3686_end_mask_0 = const()[name = tensor<string, []>("op_3686_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3686_cast_fp16 = slice_by_index(begin = var_3686_begin_0, end = var_3686_end_0, end_mask = var_3686_end_mask_0, x = var_3577_cast_fp16)[name = tensor<string, []>("op_3686_cast_fp16")];
+            tensor<int32, [4]> var_3693_begin_0 = const()[name = tensor<string, []>("op_3693_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3693_end_0 = const()[name = tensor<string, []>("op_3693_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3693_end_mask_0 = const()[name = tensor<string, []>("op_3693_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3693_cast_fp16 = slice_by_index(begin = var_3693_begin_0, end = var_3693_end_0, end_mask = var_3693_end_mask_0, x = var_3577_cast_fp16)[name = tensor<string, []>("op_3693_cast_fp16")];
+            tensor<int32, [4]> var_3700_begin_0 = const()[name = tensor<string, []>("op_3700_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3700_end_0 = const()[name = tensor<string, []>("op_3700_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3700_end_mask_0 = const()[name = tensor<string, []>("op_3700_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3700_cast_fp16 = slice_by_index(begin = var_3700_begin_0, end = var_3700_end_0, end_mask = var_3700_end_mask_0, x = var_3577_cast_fp16)[name = tensor<string, []>("op_3700_cast_fp16")];
+            tensor<int32, [4]> var_3707_begin_0 = const()[name = tensor<string, []>("op_3707_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3707_end_0 = const()[name = tensor<string, []>("op_3707_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3707_end_mask_0 = const()[name = tensor<string, []>("op_3707_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3707_cast_fp16 = slice_by_index(begin = var_3707_begin_0, end = var_3707_end_0, end_mask = var_3707_end_mask_0, x = var_3577_cast_fp16)[name = tensor<string, []>("op_3707_cast_fp16")];
+            tensor<int32, [4]> var_3714_begin_0 = const()[name = tensor<string, []>("op_3714_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3714_end_0 = const()[name = tensor<string, []>("op_3714_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3714_end_mask_0 = const()[name = tensor<string, []>("op_3714_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3714_cast_fp16 = slice_by_index(begin = var_3714_begin_0, end = var_3714_end_0, end_mask = var_3714_end_mask_0, x = var_3581_cast_fp16)[name = tensor<string, []>("op_3714_cast_fp16")];
+            tensor<int32, [4]> var_3721_begin_0 = const()[name = tensor<string, []>("op_3721_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3721_end_0 = const()[name = tensor<string, []>("op_3721_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3721_end_mask_0 = const()[name = tensor<string, []>("op_3721_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3721_cast_fp16 = slice_by_index(begin = var_3721_begin_0, end = var_3721_end_0, end_mask = var_3721_end_mask_0, x = var_3581_cast_fp16)[name = tensor<string, []>("op_3721_cast_fp16")];
+            tensor<int32, [4]> var_3728_begin_0 = const()[name = tensor<string, []>("op_3728_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3728_end_0 = const()[name = tensor<string, []>("op_3728_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3728_end_mask_0 = const()[name = tensor<string, []>("op_3728_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3728_cast_fp16 = slice_by_index(begin = var_3728_begin_0, end = var_3728_end_0, end_mask = var_3728_end_mask_0, x = var_3581_cast_fp16)[name = tensor<string, []>("op_3728_cast_fp16")];
+            tensor<int32, [4]> var_3735_begin_0 = const()[name = tensor<string, []>("op_3735_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3735_end_0 = const()[name = tensor<string, []>("op_3735_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3735_end_mask_0 = const()[name = tensor<string, []>("op_3735_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3735_cast_fp16 = slice_by_index(begin = var_3735_begin_0, end = var_3735_end_0, end_mask = var_3735_end_mask_0, x = var_3581_cast_fp16)[name = tensor<string, []>("op_3735_cast_fp16")];
+            tensor<int32, [4]> var_3742_begin_0 = const()[name = tensor<string, []>("op_3742_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3742_end_0 = const()[name = tensor<string, []>("op_3742_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3742_end_mask_0 = const()[name = tensor<string, []>("op_3742_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3742_cast_fp16 = slice_by_index(begin = var_3742_begin_0, end = var_3742_end_0, end_mask = var_3742_end_mask_0, x = var_3585_cast_fp16)[name = tensor<string, []>("op_3742_cast_fp16")];
+            tensor<int32, [4]> var_3749_begin_0 = const()[name = tensor<string, []>("op_3749_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3749_end_0 = const()[name = tensor<string, []>("op_3749_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3749_end_mask_0 = const()[name = tensor<string, []>("op_3749_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3749_cast_fp16 = slice_by_index(begin = var_3749_begin_0, end = var_3749_end_0, end_mask = var_3749_end_mask_0, x = var_3585_cast_fp16)[name = tensor<string, []>("op_3749_cast_fp16")];
+            tensor<int32, [4]> var_3756_begin_0 = const()[name = tensor<string, []>("op_3756_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3756_end_0 = const()[name = tensor<string, []>("op_3756_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3756_end_mask_0 = const()[name = tensor<string, []>("op_3756_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3756_cast_fp16 = slice_by_index(begin = var_3756_begin_0, end = var_3756_end_0, end_mask = var_3756_end_mask_0, x = var_3585_cast_fp16)[name = tensor<string, []>("op_3756_cast_fp16")];
+            tensor<int32, [4]> var_3763_begin_0 = const()[name = tensor<string, []>("op_3763_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3763_end_0 = const()[name = tensor<string, []>("op_3763_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3763_end_mask_0 = const()[name = tensor<string, []>("op_3763_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3763_cast_fp16 = slice_by_index(begin = var_3763_begin_0, end = var_3763_end_0, end_mask = var_3763_end_mask_0, x = var_3585_cast_fp16)[name = tensor<string, []>("op_3763_cast_fp16")];
+            tensor<int32, [4]> var_3770_begin_0 = const()[name = tensor<string, []>("op_3770_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3770_end_0 = const()[name = tensor<string, []>("op_3770_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3770_end_mask_0 = const()[name = tensor<string, []>("op_3770_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3770_cast_fp16 = slice_by_index(begin = var_3770_begin_0, end = var_3770_end_0, end_mask = var_3770_end_mask_0, x = var_3589_cast_fp16)[name = tensor<string, []>("op_3770_cast_fp16")];
+            tensor<int32, [4]> var_3777_begin_0 = const()[name = tensor<string, []>("op_3777_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3777_end_0 = const()[name = tensor<string, []>("op_3777_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3777_end_mask_0 = const()[name = tensor<string, []>("op_3777_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3777_cast_fp16 = slice_by_index(begin = var_3777_begin_0, end = var_3777_end_0, end_mask = var_3777_end_mask_0, x = var_3589_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<int32, [4]> var_3784_begin_0 = const()[name = tensor<string, []>("op_3784_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3784_end_0 = const()[name = tensor<string, []>("op_3784_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3784_end_mask_0 = const()[name = tensor<string, []>("op_3784_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3784_cast_fp16 = slice_by_index(begin = var_3784_begin_0, end = var_3784_end_0, end_mask = var_3784_end_mask_0, x = var_3589_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<int32, [4]> var_3791_begin_0 = const()[name = tensor<string, []>("op_3791_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3791_end_0 = const()[name = tensor<string, []>("op_3791_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3791_end_mask_0 = const()[name = tensor<string, []>("op_3791_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = var_3791_end_0, end_mask = var_3791_end_mask_0, x = var_3589_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<int32, [4]> var_3798_begin_0 = const()[name = tensor<string, []>("op_3798_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3798_end_0 = const()[name = tensor<string, []>("op_3798_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3798_end_mask_0 = const()[name = tensor<string, []>("op_3798_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3798_cast_fp16 = slice_by_index(begin = var_3798_begin_0, end = var_3798_end_0, end_mask = var_3798_end_mask_0, x = var_3593_cast_fp16)[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<int32, [4]> var_3805_begin_0 = const()[name = tensor<string, []>("op_3805_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3805_end_0 = const()[name = tensor<string, []>("op_3805_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3805_end_mask_0 = const()[name = tensor<string, []>("op_3805_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3805_cast_fp16 = slice_by_index(begin = var_3805_begin_0, end = var_3805_end_0, end_mask = var_3805_end_mask_0, x = var_3593_cast_fp16)[name = tensor<string, []>("op_3805_cast_fp16")];
+            tensor<int32, [4]> var_3812_begin_0 = const()[name = tensor<string, []>("op_3812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3812_end_0 = const()[name = tensor<string, []>("op_3812_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3812_end_mask_0 = const()[name = tensor<string, []>("op_3812_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3812_cast_fp16 = slice_by_index(begin = var_3812_begin_0, end = var_3812_end_0, end_mask = var_3812_end_mask_0, x = var_3593_cast_fp16)[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<int32, [4]> var_3819_begin_0 = const()[name = tensor<string, []>("op_3819_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3819_end_0 = const()[name = tensor<string, []>("op_3819_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3819_end_mask_0 = const()[name = tensor<string, []>("op_3819_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3819_cast_fp16 = slice_by_index(begin = var_3819_begin_0, end = var_3819_end_0, end_mask = var_3819_end_mask_0, x = var_3593_cast_fp16)[name = tensor<string, []>("op_3819_cast_fp16")];
+            tensor<int32, [4]> k_perm_0 = const()[name = tensor<string, []>("k_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_3824_begin_0 = const()[name = tensor<string, []>("op_3824_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3824_end_0 = const()[name = tensor<string, []>("op_3824_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_3824_end_mask_0 = const()[name = tensor<string, []>("op_3824_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 512]> transpose_0 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3824_cast_fp16 = slice_by_index(begin = var_3824_begin_0, end = var_3824_end_0, end_mask = var_3824_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<int32, [4]> var_3828_begin_0 = const()[name = tensor<string, []>("op_3828_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_3828_end_0 = const()[name = tensor<string, []>("op_3828_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_3828_end_mask_0 = const()[name = tensor<string, []>("op_3828_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3828_cast_fp16 = slice_by_index(begin = var_3828_begin_0, end = var_3828_end_0, end_mask = var_3828_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<int32, [4]> var_3832_begin_0 = const()[name = tensor<string, []>("op_3832_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_3832_end_0 = const()[name = tensor<string, []>("op_3832_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_3832_end_mask_0 = const()[name = tensor<string, []>("op_3832_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3832_cast_fp16 = slice_by_index(begin = var_3832_begin_0, end = var_3832_end_0, end_mask = var_3832_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<int32, [4]> var_3836_begin_0 = const()[name = tensor<string, []>("op_3836_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_3836_end_0 = const()[name = tensor<string, []>("op_3836_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_3836_end_mask_0 = const()[name = tensor<string, []>("op_3836_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3836_cast_fp16 = slice_by_index(begin = var_3836_begin_0, end = var_3836_end_0, end_mask = var_3836_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<int32, [4]> var_3840_begin_0 = const()[name = tensor<string, []>("op_3840_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_3840_end_0 = const()[name = tensor<string, []>("op_3840_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_3840_end_mask_0 = const()[name = tensor<string, []>("op_3840_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3840_cast_fp16 = slice_by_index(begin = var_3840_begin_0, end = var_3840_end_0, end_mask = var_3840_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3840_cast_fp16")];
+            tensor<int32, [4]> var_3844_begin_0 = const()[name = tensor<string, []>("op_3844_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_3844_end_0 = const()[name = tensor<string, []>("op_3844_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_3844_end_mask_0 = const()[name = tensor<string, []>("op_3844_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3844_cast_fp16 = slice_by_index(begin = var_3844_begin_0, end = var_3844_end_0, end_mask = var_3844_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3844_cast_fp16")];
+            tensor<int32, [4]> var_3848_begin_0 = const()[name = tensor<string, []>("op_3848_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_3848_end_0 = const()[name = tensor<string, []>("op_3848_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_3848_end_mask_0 = const()[name = tensor<string, []>("op_3848_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3848_cast_fp16 = slice_by_index(begin = var_3848_begin_0, end = var_3848_end_0, end_mask = var_3848_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3848_cast_fp16")];
+            tensor<int32, [4]> var_3852_begin_0 = const()[name = tensor<string, []>("op_3852_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_3852_end_0 = const()[name = tensor<string, []>("op_3852_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_3852_end_mask_0 = const()[name = tensor<string, []>("op_3852_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3852_cast_fp16 = slice_by_index(begin = var_3852_begin_0, end = var_3852_end_0, end_mask = var_3852_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_3852_cast_fp16")];
+            tensor<int32, [4]> var_3854_begin_0 = const()[name = tensor<string, []>("op_3854_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3854_end_0 = const()[name = tensor<string, []>("op_3854_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3854_end_mask_0 = const()[name = tensor<string, []>("op_3854_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = var_3854_end_0, end_mask = var_3854_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3854_cast_fp16")];
+            tensor<int32, [4]> var_3858_begin_0 = const()[name = tensor<string, []>("op_3858_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3858_end_0 = const()[name = tensor<string, []>("op_3858_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3858_end_mask_0 = const()[name = tensor<string, []>("op_3858_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3858_cast_fp16 = slice_by_index(begin = var_3858_begin_0, end = var_3858_end_0, end_mask = var_3858_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3858_cast_fp16")];
+            tensor<int32, [4]> var_3862_begin_0 = const()[name = tensor<string, []>("op_3862_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3862_end_0 = const()[name = tensor<string, []>("op_3862_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3862_end_mask_0 = const()[name = tensor<string, []>("op_3862_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3862_cast_fp16 = slice_by_index(begin = var_3862_begin_0, end = var_3862_end_0, end_mask = var_3862_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3862_cast_fp16")];
+            tensor<int32, [4]> var_3866_begin_0 = const()[name = tensor<string, []>("op_3866_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3866_end_0 = const()[name = tensor<string, []>("op_3866_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3866_end_mask_0 = const()[name = tensor<string, []>("op_3866_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3866_cast_fp16 = slice_by_index(begin = var_3866_begin_0, end = var_3866_end_0, end_mask = var_3866_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3866_cast_fp16")];
+            tensor<int32, [4]> var_3870_begin_0 = const()[name = tensor<string, []>("op_3870_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3870_end_0 = const()[name = tensor<string, []>("op_3870_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3870_end_mask_0 = const()[name = tensor<string, []>("op_3870_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3870_cast_fp16 = slice_by_index(begin = var_3870_begin_0, end = var_3870_end_0, end_mask = var_3870_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3870_cast_fp16")];
+            tensor<int32, [4]> var_3874_begin_0 = const()[name = tensor<string, []>("op_3874_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3874_end_0 = const()[name = tensor<string, []>("op_3874_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3874_end_mask_0 = const()[name = tensor<string, []>("op_3874_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3874_cast_fp16 = slice_by_index(begin = var_3874_begin_0, end = var_3874_end_0, end_mask = var_3874_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3874_cast_fp16")];
+            tensor<int32, [4]> var_3878_begin_0 = const()[name = tensor<string, []>("op_3878_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3878_end_0 = const()[name = tensor<string, []>("op_3878_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3878_end_mask_0 = const()[name = tensor<string, []>("op_3878_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3878_cast_fp16 = slice_by_index(begin = var_3878_begin_0, end = var_3878_end_0, end_mask = var_3878_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3878_cast_fp16")];
+            tensor<int32, [4]> var_3882_begin_0 = const()[name = tensor<string, []>("op_3882_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3882_end_0 = const()[name = tensor<string, []>("op_3882_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3882_end_mask_0 = const()[name = tensor<string, []>("op_3882_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3882_cast_fp16 = slice_by_index(begin = var_3882_begin_0, end = var_3882_end_0, end_mask = var_3882_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_3882_cast_fp16")];
+            tensor<string, []> var_3886_equation_0 = const()[name = tensor<string, []>("op_3886_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3886_cast_fp16 = einsum(equation = var_3886_equation_0, values = (var_3824_cast_fp16, var_3602_cast_fp16))[name = tensor<string, []>("op_3886_cast_fp16")];
+            tensor<fp16, []> var_3887_to_fp16 = const()[name = tensor<string, []>("op_3887_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_321_cast_fp16 = mul(x = var_3886_cast_fp16, y = var_3887_to_fp16)[name = tensor<string, []>("aw_chunk_321_cast_fp16")];
+            tensor<string, []> var_3890_equation_0 = const()[name = tensor<string, []>("op_3890_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3890_cast_fp16 = einsum(equation = var_3890_equation_0, values = (var_3824_cast_fp16, var_3609_cast_fp16))[name = tensor<string, []>("op_3890_cast_fp16")];
+            tensor<fp16, []> var_3891_to_fp16 = const()[name = tensor<string, []>("op_3891_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_323_cast_fp16 = mul(x = var_3890_cast_fp16, y = var_3891_to_fp16)[name = tensor<string, []>("aw_chunk_323_cast_fp16")];
+            tensor<string, []> var_3894_equation_0 = const()[name = tensor<string, []>("op_3894_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3894_cast_fp16 = einsum(equation = var_3894_equation_0, values = (var_3824_cast_fp16, var_3616_cast_fp16))[name = tensor<string, []>("op_3894_cast_fp16")];
+            tensor<fp16, []> var_3895_to_fp16 = const()[name = tensor<string, []>("op_3895_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_325_cast_fp16 = mul(x = var_3894_cast_fp16, y = var_3895_to_fp16)[name = tensor<string, []>("aw_chunk_325_cast_fp16")];
+            tensor<string, []> var_3898_equation_0 = const()[name = tensor<string, []>("op_3898_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3898_cast_fp16 = einsum(equation = var_3898_equation_0, values = (var_3824_cast_fp16, var_3623_cast_fp16))[name = tensor<string, []>("op_3898_cast_fp16")];
+            tensor<fp16, []> var_3899_to_fp16 = const()[name = tensor<string, []>("op_3899_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_327_cast_fp16 = mul(x = var_3898_cast_fp16, y = var_3899_to_fp16)[name = tensor<string, []>("aw_chunk_327_cast_fp16")];
+            tensor<string, []> var_3902_equation_0 = const()[name = tensor<string, []>("op_3902_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3902_cast_fp16 = einsum(equation = var_3902_equation_0, values = (var_3828_cast_fp16, var_3630_cast_fp16))[name = tensor<string, []>("op_3902_cast_fp16")];
+            tensor<fp16, []> var_3903_to_fp16 = const()[name = tensor<string, []>("op_3903_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_329_cast_fp16 = mul(x = var_3902_cast_fp16, y = var_3903_to_fp16)[name = tensor<string, []>("aw_chunk_329_cast_fp16")];
+            tensor<string, []> var_3906_equation_0 = const()[name = tensor<string, []>("op_3906_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3906_cast_fp16 = einsum(equation = var_3906_equation_0, values = (var_3828_cast_fp16, var_3637_cast_fp16))[name = tensor<string, []>("op_3906_cast_fp16")];
+            tensor<fp16, []> var_3907_to_fp16 = const()[name = tensor<string, []>("op_3907_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_331_cast_fp16 = mul(x = var_3906_cast_fp16, y = var_3907_to_fp16)[name = tensor<string, []>("aw_chunk_331_cast_fp16")];
+            tensor<string, []> var_3910_equation_0 = const()[name = tensor<string, []>("op_3910_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3910_cast_fp16 = einsum(equation = var_3910_equation_0, values = (var_3828_cast_fp16, var_3644_cast_fp16))[name = tensor<string, []>("op_3910_cast_fp16")];
+            tensor<fp16, []> var_3911_to_fp16 = const()[name = tensor<string, []>("op_3911_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_333_cast_fp16 = mul(x = var_3910_cast_fp16, y = var_3911_to_fp16)[name = tensor<string, []>("aw_chunk_333_cast_fp16")];
+            tensor<string, []> var_3914_equation_0 = const()[name = tensor<string, []>("op_3914_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3914_cast_fp16 = einsum(equation = var_3914_equation_0, values = (var_3828_cast_fp16, var_3651_cast_fp16))[name = tensor<string, []>("op_3914_cast_fp16")];
+            tensor<fp16, []> var_3915_to_fp16 = const()[name = tensor<string, []>("op_3915_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_335_cast_fp16 = mul(x = var_3914_cast_fp16, y = var_3915_to_fp16)[name = tensor<string, []>("aw_chunk_335_cast_fp16")];
+            tensor<string, []> var_3918_equation_0 = const()[name = tensor<string, []>("op_3918_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3918_cast_fp16 = einsum(equation = var_3918_equation_0, values = (var_3832_cast_fp16, var_3658_cast_fp16))[name = tensor<string, []>("op_3918_cast_fp16")];
+            tensor<fp16, []> var_3919_to_fp16 = const()[name = tensor<string, []>("op_3919_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_337_cast_fp16 = mul(x = var_3918_cast_fp16, y = var_3919_to_fp16)[name = tensor<string, []>("aw_chunk_337_cast_fp16")];
+            tensor<string, []> var_3922_equation_0 = const()[name = tensor<string, []>("op_3922_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3922_cast_fp16 = einsum(equation = var_3922_equation_0, values = (var_3832_cast_fp16, var_3665_cast_fp16))[name = tensor<string, []>("op_3922_cast_fp16")];
+            tensor<fp16, []> var_3923_to_fp16 = const()[name = tensor<string, []>("op_3923_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_339_cast_fp16 = mul(x = var_3922_cast_fp16, y = var_3923_to_fp16)[name = tensor<string, []>("aw_chunk_339_cast_fp16")];
+            tensor<string, []> var_3926_equation_0 = const()[name = tensor<string, []>("op_3926_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3926_cast_fp16 = einsum(equation = var_3926_equation_0, values = (var_3832_cast_fp16, var_3672_cast_fp16))[name = tensor<string, []>("op_3926_cast_fp16")];
+            tensor<fp16, []> var_3927_to_fp16 = const()[name = tensor<string, []>("op_3927_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_341_cast_fp16 = mul(x = var_3926_cast_fp16, y = var_3927_to_fp16)[name = tensor<string, []>("aw_chunk_341_cast_fp16")];
+            tensor<string, []> var_3930_equation_0 = const()[name = tensor<string, []>("op_3930_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3930_cast_fp16 = einsum(equation = var_3930_equation_0, values = (var_3832_cast_fp16, var_3679_cast_fp16))[name = tensor<string, []>("op_3930_cast_fp16")];
+            tensor<fp16, []> var_3931_to_fp16 = const()[name = tensor<string, []>("op_3931_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_343_cast_fp16 = mul(x = var_3930_cast_fp16, y = var_3931_to_fp16)[name = tensor<string, []>("aw_chunk_343_cast_fp16")];
+            tensor<string, []> var_3934_equation_0 = const()[name = tensor<string, []>("op_3934_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3934_cast_fp16 = einsum(equation = var_3934_equation_0, values = (var_3836_cast_fp16, var_3686_cast_fp16))[name = tensor<string, []>("op_3934_cast_fp16")];
+            tensor<fp16, []> var_3935_to_fp16 = const()[name = tensor<string, []>("op_3935_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_345_cast_fp16 = mul(x = var_3934_cast_fp16, y = var_3935_to_fp16)[name = tensor<string, []>("aw_chunk_345_cast_fp16")];
+            tensor<string, []> var_3938_equation_0 = const()[name = tensor<string, []>("op_3938_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3938_cast_fp16 = einsum(equation = var_3938_equation_0, values = (var_3836_cast_fp16, var_3693_cast_fp16))[name = tensor<string, []>("op_3938_cast_fp16")];
+            tensor<fp16, []> var_3939_to_fp16 = const()[name = tensor<string, []>("op_3939_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_347_cast_fp16 = mul(x = var_3938_cast_fp16, y = var_3939_to_fp16)[name = tensor<string, []>("aw_chunk_347_cast_fp16")];
+            tensor<string, []> var_3942_equation_0 = const()[name = tensor<string, []>("op_3942_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3942_cast_fp16 = einsum(equation = var_3942_equation_0, values = (var_3836_cast_fp16, var_3700_cast_fp16))[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<fp16, []> var_3943_to_fp16 = const()[name = tensor<string, []>("op_3943_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_349_cast_fp16 = mul(x = var_3942_cast_fp16, y = var_3943_to_fp16)[name = tensor<string, []>("aw_chunk_349_cast_fp16")];
+            tensor<string, []> var_3946_equation_0 = const()[name = tensor<string, []>("op_3946_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3946_cast_fp16 = einsum(equation = var_3946_equation_0, values = (var_3836_cast_fp16, var_3707_cast_fp16))[name = tensor<string, []>("op_3946_cast_fp16")];
+            tensor<fp16, []> var_3947_to_fp16 = const()[name = tensor<string, []>("op_3947_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_351_cast_fp16 = mul(x = var_3946_cast_fp16, y = var_3947_to_fp16)[name = tensor<string, []>("aw_chunk_351_cast_fp16")];
+            tensor<string, []> var_3950_equation_0 = const()[name = tensor<string, []>("op_3950_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3950_cast_fp16 = einsum(equation = var_3950_equation_0, values = (var_3840_cast_fp16, var_3714_cast_fp16))[name = tensor<string, []>("op_3950_cast_fp16")];
+            tensor<fp16, []> var_3951_to_fp16 = const()[name = tensor<string, []>("op_3951_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_353_cast_fp16 = mul(x = var_3950_cast_fp16, y = var_3951_to_fp16)[name = tensor<string, []>("aw_chunk_353_cast_fp16")];
+            tensor<string, []> var_3954_equation_0 = const()[name = tensor<string, []>("op_3954_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3954_cast_fp16 = einsum(equation = var_3954_equation_0, values = (var_3840_cast_fp16, var_3721_cast_fp16))[name = tensor<string, []>("op_3954_cast_fp16")];
+            tensor<fp16, []> var_3955_to_fp16 = const()[name = tensor<string, []>("op_3955_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_355_cast_fp16 = mul(x = var_3954_cast_fp16, y = var_3955_to_fp16)[name = tensor<string, []>("aw_chunk_355_cast_fp16")];
+            tensor<string, []> var_3958_equation_0 = const()[name = tensor<string, []>("op_3958_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3958_cast_fp16 = einsum(equation = var_3958_equation_0, values = (var_3840_cast_fp16, var_3728_cast_fp16))[name = tensor<string, []>("op_3958_cast_fp16")];
+            tensor<fp16, []> var_3959_to_fp16 = const()[name = tensor<string, []>("op_3959_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_357_cast_fp16 = mul(x = var_3958_cast_fp16, y = var_3959_to_fp16)[name = tensor<string, []>("aw_chunk_357_cast_fp16")];
+            tensor<string, []> var_3962_equation_0 = const()[name = tensor<string, []>("op_3962_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3962_cast_fp16 = einsum(equation = var_3962_equation_0, values = (var_3840_cast_fp16, var_3735_cast_fp16))[name = tensor<string, []>("op_3962_cast_fp16")];
+            tensor<fp16, []> var_3963_to_fp16 = const()[name = tensor<string, []>("op_3963_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_359_cast_fp16 = mul(x = var_3962_cast_fp16, y = var_3963_to_fp16)[name = tensor<string, []>("aw_chunk_359_cast_fp16")];
+            tensor<string, []> var_3966_equation_0 = const()[name = tensor<string, []>("op_3966_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3966_cast_fp16 = einsum(equation = var_3966_equation_0, values = (var_3844_cast_fp16, var_3742_cast_fp16))[name = tensor<string, []>("op_3966_cast_fp16")];
+            tensor<fp16, []> var_3967_to_fp16 = const()[name = tensor<string, []>("op_3967_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_361_cast_fp16 = mul(x = var_3966_cast_fp16, y = var_3967_to_fp16)[name = tensor<string, []>("aw_chunk_361_cast_fp16")];
+            tensor<string, []> var_3970_equation_0 = const()[name = tensor<string, []>("op_3970_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3970_cast_fp16 = einsum(equation = var_3970_equation_0, values = (var_3844_cast_fp16, var_3749_cast_fp16))[name = tensor<string, []>("op_3970_cast_fp16")];
+            tensor<fp16, []> var_3971_to_fp16 = const()[name = tensor<string, []>("op_3971_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_363_cast_fp16 = mul(x = var_3970_cast_fp16, y = var_3971_to_fp16)[name = tensor<string, []>("aw_chunk_363_cast_fp16")];
+            tensor<string, []> var_3974_equation_0 = const()[name = tensor<string, []>("op_3974_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3974_cast_fp16 = einsum(equation = var_3974_equation_0, values = (var_3844_cast_fp16, var_3756_cast_fp16))[name = tensor<string, []>("op_3974_cast_fp16")];
+            tensor<fp16, []> var_3975_to_fp16 = const()[name = tensor<string, []>("op_3975_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_365_cast_fp16 = mul(x = var_3974_cast_fp16, y = var_3975_to_fp16)[name = tensor<string, []>("aw_chunk_365_cast_fp16")];
+            tensor<string, []> var_3978_equation_0 = const()[name = tensor<string, []>("op_3978_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3978_cast_fp16 = einsum(equation = var_3978_equation_0, values = (var_3844_cast_fp16, var_3763_cast_fp16))[name = tensor<string, []>("op_3978_cast_fp16")];
+            tensor<fp16, []> var_3979_to_fp16 = const()[name = tensor<string, []>("op_3979_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_367_cast_fp16 = mul(x = var_3978_cast_fp16, y = var_3979_to_fp16)[name = tensor<string, []>("aw_chunk_367_cast_fp16")];
+            tensor<string, []> var_3982_equation_0 = const()[name = tensor<string, []>("op_3982_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3982_cast_fp16 = einsum(equation = var_3982_equation_0, values = (var_3848_cast_fp16, var_3770_cast_fp16))[name = tensor<string, []>("op_3982_cast_fp16")];
+            tensor<fp16, []> var_3983_to_fp16 = const()[name = tensor<string, []>("op_3983_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_369_cast_fp16 = mul(x = var_3982_cast_fp16, y = var_3983_to_fp16)[name = tensor<string, []>("aw_chunk_369_cast_fp16")];
+            tensor<string, []> var_3986_equation_0 = const()[name = tensor<string, []>("op_3986_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3986_cast_fp16 = einsum(equation = var_3986_equation_0, values = (var_3848_cast_fp16, var_3777_cast_fp16))[name = tensor<string, []>("op_3986_cast_fp16")];
+            tensor<fp16, []> var_3987_to_fp16 = const()[name = tensor<string, []>("op_3987_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_371_cast_fp16 = mul(x = var_3986_cast_fp16, y = var_3987_to_fp16)[name = tensor<string, []>("aw_chunk_371_cast_fp16")];
+            tensor<string, []> var_3990_equation_0 = const()[name = tensor<string, []>("op_3990_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3990_cast_fp16 = einsum(equation = var_3990_equation_0, values = (var_3848_cast_fp16, var_3784_cast_fp16))[name = tensor<string, []>("op_3990_cast_fp16")];
+            tensor<fp16, []> var_3991_to_fp16 = const()[name = tensor<string, []>("op_3991_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_373_cast_fp16 = mul(x = var_3990_cast_fp16, y = var_3991_to_fp16)[name = tensor<string, []>("aw_chunk_373_cast_fp16")];
+            tensor<string, []> var_3994_equation_0 = const()[name = tensor<string, []>("op_3994_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3994_cast_fp16 = einsum(equation = var_3994_equation_0, values = (var_3848_cast_fp16, var_3791_cast_fp16))[name = tensor<string, []>("op_3994_cast_fp16")];
+            tensor<fp16, []> var_3995_to_fp16 = const()[name = tensor<string, []>("op_3995_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_375_cast_fp16 = mul(x = var_3994_cast_fp16, y = var_3995_to_fp16)[name = tensor<string, []>("aw_chunk_375_cast_fp16")];
+            tensor<string, []> var_3998_equation_0 = const()[name = tensor<string, []>("op_3998_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3998_cast_fp16 = einsum(equation = var_3998_equation_0, values = (var_3852_cast_fp16, var_3798_cast_fp16))[name = tensor<string, []>("op_3998_cast_fp16")];
+            tensor<fp16, []> var_3999_to_fp16 = const()[name = tensor<string, []>("op_3999_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_377_cast_fp16 = mul(x = var_3998_cast_fp16, y = var_3999_to_fp16)[name = tensor<string, []>("aw_chunk_377_cast_fp16")];
+            tensor<string, []> var_4002_equation_0 = const()[name = tensor<string, []>("op_4002_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4002_cast_fp16 = einsum(equation = var_4002_equation_0, values = (var_3852_cast_fp16, var_3805_cast_fp16))[name = tensor<string, []>("op_4002_cast_fp16")];
+            tensor<fp16, []> var_4003_to_fp16 = const()[name = tensor<string, []>("op_4003_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_379_cast_fp16 = mul(x = var_4002_cast_fp16, y = var_4003_to_fp16)[name = tensor<string, []>("aw_chunk_379_cast_fp16")];
+            tensor<string, []> var_4006_equation_0 = const()[name = tensor<string, []>("op_4006_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4006_cast_fp16 = einsum(equation = var_4006_equation_0, values = (var_3852_cast_fp16, var_3812_cast_fp16))[name = tensor<string, []>("op_4006_cast_fp16")];
+            tensor<fp16, []> var_4007_to_fp16 = const()[name = tensor<string, []>("op_4007_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_381_cast_fp16 = mul(x = var_4006_cast_fp16, y = var_4007_to_fp16)[name = tensor<string, []>("aw_chunk_381_cast_fp16")];
+            tensor<string, []> var_4010_equation_0 = const()[name = tensor<string, []>("op_4010_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4010_cast_fp16 = einsum(equation = var_4010_equation_0, values = (var_3852_cast_fp16, var_3819_cast_fp16))[name = tensor<string, []>("op_4010_cast_fp16")];
+            tensor<fp16, []> var_4011_to_fp16 = const()[name = tensor<string, []>("op_4011_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_cast_fp16 = mul(x = var_4010_cast_fp16, y = var_4011_to_fp16)[name = tensor<string, []>("aw_chunk_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4013_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_321_cast_fp16)[name = tensor<string, []>("op_4013_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4014_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_323_cast_fp16)[name = tensor<string, []>("op_4014_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4015_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_325_cast_fp16)[name = tensor<string, []>("op_4015_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4016_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_327_cast_fp16)[name = tensor<string, []>("op_4016_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4017_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_329_cast_fp16)[name = tensor<string, []>("op_4017_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4018_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_331_cast_fp16)[name = tensor<string, []>("op_4018_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4019_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_333_cast_fp16)[name = tensor<string, []>("op_4019_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4020_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_335_cast_fp16)[name = tensor<string, []>("op_4020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4021_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_337_cast_fp16)[name = tensor<string, []>("op_4021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4022_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_339_cast_fp16)[name = tensor<string, []>("op_4022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4023_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_341_cast_fp16)[name = tensor<string, []>("op_4023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4024_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_343_cast_fp16)[name = tensor<string, []>("op_4024_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4025_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_345_cast_fp16)[name = tensor<string, []>("op_4025_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4026_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_347_cast_fp16)[name = tensor<string, []>("op_4026_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4027_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_349_cast_fp16)[name = tensor<string, []>("op_4027_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4028_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_351_cast_fp16)[name = tensor<string, []>("op_4028_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4029_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_353_cast_fp16)[name = tensor<string, []>("op_4029_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4030_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_355_cast_fp16)[name = tensor<string, []>("op_4030_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4031_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_357_cast_fp16)[name = tensor<string, []>("op_4031_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4032_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_359_cast_fp16)[name = tensor<string, []>("op_4032_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4033_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_361_cast_fp16)[name = tensor<string, []>("op_4033_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4034_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_363_cast_fp16)[name = tensor<string, []>("op_4034_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4035_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_365_cast_fp16)[name = tensor<string, []>("op_4035_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4036_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_367_cast_fp16)[name = tensor<string, []>("op_4036_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4037_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_369_cast_fp16)[name = tensor<string, []>("op_4037_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4038_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_371_cast_fp16)[name = tensor<string, []>("op_4038_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4039_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_373_cast_fp16)[name = tensor<string, []>("op_4039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4040_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_375_cast_fp16)[name = tensor<string, []>("op_4040_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4041_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_377_cast_fp16)[name = tensor<string, []>("op_4041_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4042_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_379_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4043_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_381_cast_fp16)[name = tensor<string, []>("op_4043_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4044_cast_fp16 = softmax(axis = var_3510, x = aw_chunk_cast_fp16)[name = tensor<string, []>("op_4044_cast_fp16")];
+            tensor<string, []> var_4046_equation_0 = const()[name = tensor<string, []>("op_4046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4046_cast_fp16 = einsum(equation = var_4046_equation_0, values = (var_3854_cast_fp16, var_4013_cast_fp16))[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<string, []> var_4048_equation_0 = const()[name = tensor<string, []>("op_4048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4048_cast_fp16 = einsum(equation = var_4048_equation_0, values = (var_3854_cast_fp16, var_4014_cast_fp16))[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<string, []> var_4050_equation_0 = const()[name = tensor<string, []>("op_4050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4050_cast_fp16 = einsum(equation = var_4050_equation_0, values = (var_3854_cast_fp16, var_4015_cast_fp16))[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<string, []> var_4052_equation_0 = const()[name = tensor<string, []>("op_4052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4052_cast_fp16 = einsum(equation = var_4052_equation_0, values = (var_3854_cast_fp16, var_4016_cast_fp16))[name = tensor<string, []>("op_4052_cast_fp16")];
+            tensor<string, []> var_4054_equation_0 = const()[name = tensor<string, []>("op_4054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4054_cast_fp16 = einsum(equation = var_4054_equation_0, values = (var_3858_cast_fp16, var_4017_cast_fp16))[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<string, []> var_4056_equation_0 = const()[name = tensor<string, []>("op_4056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4056_cast_fp16 = einsum(equation = var_4056_equation_0, values = (var_3858_cast_fp16, var_4018_cast_fp16))[name = tensor<string, []>("op_4056_cast_fp16")];
+            tensor<string, []> var_4058_equation_0 = const()[name = tensor<string, []>("op_4058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4058_cast_fp16 = einsum(equation = var_4058_equation_0, values = (var_3858_cast_fp16, var_4019_cast_fp16))[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<string, []> var_4060_equation_0 = const()[name = tensor<string, []>("op_4060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4060_cast_fp16 = einsum(equation = var_4060_equation_0, values = (var_3858_cast_fp16, var_4020_cast_fp16))[name = tensor<string, []>("op_4060_cast_fp16")];
+            tensor<string, []> var_4062_equation_0 = const()[name = tensor<string, []>("op_4062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4062_cast_fp16 = einsum(equation = var_4062_equation_0, values = (var_3862_cast_fp16, var_4021_cast_fp16))[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<string, []> var_4064_equation_0 = const()[name = tensor<string, []>("op_4064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4064_cast_fp16 = einsum(equation = var_4064_equation_0, values = (var_3862_cast_fp16, var_4022_cast_fp16))[name = tensor<string, []>("op_4064_cast_fp16")];
+            tensor<string, []> var_4066_equation_0 = const()[name = tensor<string, []>("op_4066_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4066_cast_fp16 = einsum(equation = var_4066_equation_0, values = (var_3862_cast_fp16, var_4023_cast_fp16))[name = tensor<string, []>("op_4066_cast_fp16")];
+            tensor<string, []> var_4068_equation_0 = const()[name = tensor<string, []>("op_4068_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4068_cast_fp16 = einsum(equation = var_4068_equation_0, values = (var_3862_cast_fp16, var_4024_cast_fp16))[name = tensor<string, []>("op_4068_cast_fp16")];
+            tensor<string, []> var_4070_equation_0 = const()[name = tensor<string, []>("op_4070_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4070_cast_fp16 = einsum(equation = var_4070_equation_0, values = (var_3866_cast_fp16, var_4025_cast_fp16))[name = tensor<string, []>("op_4070_cast_fp16")];
+            tensor<string, []> var_4072_equation_0 = const()[name = tensor<string, []>("op_4072_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4072_cast_fp16 = einsum(equation = var_4072_equation_0, values = (var_3866_cast_fp16, var_4026_cast_fp16))[name = tensor<string, []>("op_4072_cast_fp16")];
+            tensor<string, []> var_4074_equation_0 = const()[name = tensor<string, []>("op_4074_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4074_cast_fp16 = einsum(equation = var_4074_equation_0, values = (var_3866_cast_fp16, var_4027_cast_fp16))[name = tensor<string, []>("op_4074_cast_fp16")];
+            tensor<string, []> var_4076_equation_0 = const()[name = tensor<string, []>("op_4076_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4076_cast_fp16 = einsum(equation = var_4076_equation_0, values = (var_3866_cast_fp16, var_4028_cast_fp16))[name = tensor<string, []>("op_4076_cast_fp16")];
+            tensor<string, []> var_4078_equation_0 = const()[name = tensor<string, []>("op_4078_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4078_cast_fp16 = einsum(equation = var_4078_equation_0, values = (var_3870_cast_fp16, var_4029_cast_fp16))[name = tensor<string, []>("op_4078_cast_fp16")];
+            tensor<string, []> var_4080_equation_0 = const()[name = tensor<string, []>("op_4080_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4080_cast_fp16 = einsum(equation = var_4080_equation_0, values = (var_3870_cast_fp16, var_4030_cast_fp16))[name = tensor<string, []>("op_4080_cast_fp16")];
+            tensor<string, []> var_4082_equation_0 = const()[name = tensor<string, []>("op_4082_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4082_cast_fp16 = einsum(equation = var_4082_equation_0, values = (var_3870_cast_fp16, var_4031_cast_fp16))[name = tensor<string, []>("op_4082_cast_fp16")];
+            tensor<string, []> var_4084_equation_0 = const()[name = tensor<string, []>("op_4084_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4084_cast_fp16 = einsum(equation = var_4084_equation_0, values = (var_3870_cast_fp16, var_4032_cast_fp16))[name = tensor<string, []>("op_4084_cast_fp16")];
+            tensor<string, []> var_4086_equation_0 = const()[name = tensor<string, []>("op_4086_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4086_cast_fp16 = einsum(equation = var_4086_equation_0, values = (var_3874_cast_fp16, var_4033_cast_fp16))[name = tensor<string, []>("op_4086_cast_fp16")];
+            tensor<string, []> var_4088_equation_0 = const()[name = tensor<string, []>("op_4088_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4088_cast_fp16 = einsum(equation = var_4088_equation_0, values = (var_3874_cast_fp16, var_4034_cast_fp16))[name = tensor<string, []>("op_4088_cast_fp16")];
+            tensor<string, []> var_4090_equation_0 = const()[name = tensor<string, []>("op_4090_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4090_cast_fp16 = einsum(equation = var_4090_equation_0, values = (var_3874_cast_fp16, var_4035_cast_fp16))[name = tensor<string, []>("op_4090_cast_fp16")];
+            tensor<string, []> var_4092_equation_0 = const()[name = tensor<string, []>("op_4092_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4092_cast_fp16 = einsum(equation = var_4092_equation_0, values = (var_3874_cast_fp16, var_4036_cast_fp16))[name = tensor<string, []>("op_4092_cast_fp16")];
+            tensor<string, []> var_4094_equation_0 = const()[name = tensor<string, []>("op_4094_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4094_cast_fp16 = einsum(equation = var_4094_equation_0, values = (var_3878_cast_fp16, var_4037_cast_fp16))[name = tensor<string, []>("op_4094_cast_fp16")];
+            tensor<string, []> var_4096_equation_0 = const()[name = tensor<string, []>("op_4096_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4096_cast_fp16 = einsum(equation = var_4096_equation_0, values = (var_3878_cast_fp16, var_4038_cast_fp16))[name = tensor<string, []>("op_4096_cast_fp16")];
+            tensor<string, []> var_4098_equation_0 = const()[name = tensor<string, []>("op_4098_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4098_cast_fp16 = einsum(equation = var_4098_equation_0, values = (var_3878_cast_fp16, var_4039_cast_fp16))[name = tensor<string, []>("op_4098_cast_fp16")];
+            tensor<string, []> var_4100_equation_0 = const()[name = tensor<string, []>("op_4100_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4100_cast_fp16 = einsum(equation = var_4100_equation_0, values = (var_3878_cast_fp16, var_4040_cast_fp16))[name = tensor<string, []>("op_4100_cast_fp16")];
+            tensor<string, []> var_4102_equation_0 = const()[name = tensor<string, []>("op_4102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4102_cast_fp16 = einsum(equation = var_4102_equation_0, values = (var_3882_cast_fp16, var_4041_cast_fp16))[name = tensor<string, []>("op_4102_cast_fp16")];
+            tensor<string, []> var_4104_equation_0 = const()[name = tensor<string, []>("op_4104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4104_cast_fp16 = einsum(equation = var_4104_equation_0, values = (var_3882_cast_fp16, var_4042_cast_fp16))[name = tensor<string, []>("op_4104_cast_fp16")];
+            tensor<string, []> var_4106_equation_0 = const()[name = tensor<string, []>("op_4106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4106_cast_fp16 = einsum(equation = var_4106_equation_0, values = (var_3882_cast_fp16, var_4043_cast_fp16))[name = tensor<string, []>("op_4106_cast_fp16")];
+            tensor<string, []> var_4108_equation_0 = const()[name = tensor<string, []>("op_4108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4108_cast_fp16 = einsum(equation = var_4108_equation_0, values = (var_3882_cast_fp16, var_4044_cast_fp16))[name = tensor<string, []>("op_4108_cast_fp16")];
+            tensor<bool, []> var_4110_interleave_0 = const()[name = tensor<string, []>("op_4110_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4110_cast_fp16 = concat(axis = var_3497, interleave = var_4110_interleave_0, values = (var_4046_cast_fp16, var_4048_cast_fp16, var_4050_cast_fp16, var_4052_cast_fp16))[name = tensor<string, []>("op_4110_cast_fp16")];
+            tensor<bool, []> var_4112_interleave_0 = const()[name = tensor<string, []>("op_4112_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4112_cast_fp16 = concat(axis = var_3497, interleave = var_4112_interleave_0, values = (var_4054_cast_fp16, var_4056_cast_fp16, var_4058_cast_fp16, var_4060_cast_fp16))[name = tensor<string, []>("op_4112_cast_fp16")];
+            tensor<bool, []> var_4114_interleave_0 = const()[name = tensor<string, []>("op_4114_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4114_cast_fp16 = concat(axis = var_3497, interleave = var_4114_interleave_0, values = (var_4062_cast_fp16, var_4064_cast_fp16, var_4066_cast_fp16, var_4068_cast_fp16))[name = tensor<string, []>("op_4114_cast_fp16")];
+            tensor<bool, []> var_4116_interleave_0 = const()[name = tensor<string, []>("op_4116_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4116_cast_fp16 = concat(axis = var_3497, interleave = var_4116_interleave_0, values = (var_4070_cast_fp16, var_4072_cast_fp16, var_4074_cast_fp16, var_4076_cast_fp16))[name = tensor<string, []>("op_4116_cast_fp16")];
+            tensor<bool, []> var_4118_interleave_0 = const()[name = tensor<string, []>("op_4118_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4118_cast_fp16 = concat(axis = var_3497, interleave = var_4118_interleave_0, values = (var_4078_cast_fp16, var_4080_cast_fp16, var_4082_cast_fp16, var_4084_cast_fp16))[name = tensor<string, []>("op_4118_cast_fp16")];
+            tensor<bool, []> var_4120_interleave_0 = const()[name = tensor<string, []>("op_4120_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4120_cast_fp16 = concat(axis = var_3497, interleave = var_4120_interleave_0, values = (var_4086_cast_fp16, var_4088_cast_fp16, var_4090_cast_fp16, var_4092_cast_fp16))[name = tensor<string, []>("op_4120_cast_fp16")];
+            tensor<bool, []> var_4122_interleave_0 = const()[name = tensor<string, []>("op_4122_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4122_cast_fp16 = concat(axis = var_3497, interleave = var_4122_interleave_0, values = (var_4094_cast_fp16, var_4096_cast_fp16, var_4098_cast_fp16, var_4100_cast_fp16))[name = tensor<string, []>("op_4122_cast_fp16")];
+            tensor<bool, []> var_4124_interleave_0 = const()[name = tensor<string, []>("op_4124_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4124_cast_fp16 = concat(axis = var_3497, interleave = var_4124_interleave_0, values = (var_4102_cast_fp16, var_4104_cast_fp16, var_4106_cast_fp16, var_4108_cast_fp16))[name = tensor<string, []>("op_4124_cast_fp16")];
+            tensor<bool, []> input_41_interleave_0 = const()[name = tensor<string, []>("input_41_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_41_cast_fp16 = concat(axis = var_3510, interleave = input_41_interleave_0, values = (var_4110_cast_fp16, var_4112_cast_fp16, var_4114_cast_fp16, var_4116_cast_fp16, var_4118_cast_fp16, var_4120_cast_fp16, var_4122_cast_fp16, var_4124_cast_fp16))[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_4129 = const()[name = tensor<string, []>("op_4129"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4131 = const()[name = tensor<string, []>("op_4131"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36460160)))];
+            tensor<fp16, [512]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36984512)))];
+            tensor<fp16, [1, 512, 1, 1500]> obj_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_4131, groups = var_3510, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_4129, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_4137 = const()[name = tensor<string, []>("op_4137"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_4137, keep_dims = var_3511, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_4141 = const()[name = tensor<string, []>("op_4141"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4142_cast_fp16 = reduce_mean(axes = var_4141, keep_dims = var_3511, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_4142_cast_fp16")];
+            tensor<fp16, []> var_4143_to_fp16 = const()[name = tensor<string, []>("op_4143_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4144_cast_fp16 = add(x = var_4142_cast_fp16, y = var_4143_to_fp16)[name = tensor<string, []>("op_4144_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_4144_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [512]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36985600)))];
+            tensor<fp16, [512]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36986688)))];
+            tensor<fp16, []> input_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_4155 = const()[name = tensor<string, []>("op_4155"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4157 = const()[name = tensor<string, []>("op_4157"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_45_pad_type_0 = const()[name = tensor<string, []>("input_45_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_45_pad_0 = const()[name = tensor<string, []>("input_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36987776)))];
+            tensor<fp16, [2048]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39084992)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_4157, groups = var_3510, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = var_4155, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_45_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_4163 = const()[name = tensor<string, []>("op_4163"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4165 = const()[name = tensor<string, []>("op_4165"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39089152)))];
+            tensor<fp16, [512]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41186368)))];
+            tensor<fp16, [1, 512, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_4165, groups = var_3510, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_4163, weight = layers_5_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_4171 = const()[name = tensor<string, []>("op_4171"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_4175 = const()[name = tensor<string, []>("op_4175"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_cast_fp16 = reduce_mean(axes = var_4175, keep_dims = var_4171, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_4179 = const()[name = tensor<string, []>("op_4179"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4180_cast_fp16 = reduce_mean(axes = var_4179, keep_dims = var_4171, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_4180_cast_fp16")];
+            tensor<fp16, []> var_4181_to_fp16 = const()[name = tensor<string, []>("op_4181_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4182_cast_fp16 = add(x = var_4180_cast_fp16, y = var_4181_to_fp16)[name = tensor<string, []>("op_4182_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_4182_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [512]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41187456)))];
+            tensor<fp16, [512]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41188544)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-base/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..50615c952c30a945ce18682e38a3a78eecddc358
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d42038f84b508da5ce9b953302387ffedc097c346d36a56b765109002b6080e
+size 79853
diff --git a/openai_whisper-base/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-base/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8fdbf685bba3a1efe3b57459cb3dbf088e92438d
--- /dev/null
+++ b/openai_whisper-base/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:061ff4d74e5de3937b31288465d6c6f2697f92d121c80b23f51dd26bbdfe642b
+size 41189632
diff --git a/openai_whisper-base/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-base/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8cce829d60c4056be93333a562d47d3bb2908b9b
--- /dev/null
+++ b/openai_whisper-base/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f77e6457285248f99cd7aa3fd4cc2efbb17733e63e7023ac53abe1f95785d07
+size 243
diff --git a/openai_whisper-base/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-base/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1d9e7a102f740c68cdfc7272dc5b8007c48416a
--- /dev/null
+++ b/openai_whisper-base/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabdc5aa69f6ef4d97dc9499f5c30514e00e96b53b750b33a5a6471363c71662
+size 328
diff --git a/openai_whisper-base/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-base/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d1d4a4a0390fd84f87e4780bd5c76c066220991
--- /dev/null
+++ b/openai_whisper-base/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Pad" : 1,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.add" : 3,
+      "Ios16.square" : 2,
+      "Ios16.matmul" : 1,
+      "Squeeze" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Ios16.reshape" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "7.1",
+      "com.github.apple.coremltools.source" : "torch==2.2.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-base/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-base/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a63d7fa99d6d86db1b76a1f53640cb4aa25e0210
--- /dev/null
+++ b/openai_whisper-base/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> input_3_constant_val_0_to_fp16 = const()[name = tensor<string, []>("input_3_constant_val_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = input_3_constant_val_0_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-base/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-base/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bf020d5096119ad2b4a9899e3df5e01bd5853fe5
--- /dev/null
+++ b/openai_whisper-base/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35d74417ef9c765e70f4ef85fe7405015a7086e9af05e3b63a5c2c7c748b2efc
+size 354080
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-base/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..278b66af60c1f9c790faffdbba14204338ae2f9f
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac1227740ecc2fd7a03df50ac6e2a7f7946acfa77069cf2c486ae0255356b95
+size 243
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-base/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4ed058eb406747108023b4dda112105129dffd17
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1f6fe409486e2797d3f0c65d9a6d5af596771760548cd86f41939c54cdbe7c
+size 633
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/metadata.json b/openai_whisper-base/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9546ab1be12f588ceb4b521e85e6f5c406aa743
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,165 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 3072 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 3072, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 3072 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 3072, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Split" : 2,
+      "Concat" : 3,
+      "Ios16.rsqrt" : 19,
+      "Ios16.mul" : 74,
+      "Squeeze" : 1,
+      "SliceByIndex" : 16,
+      "Ios16.sub" : 20,
+      "Transpose" : 1,
+      "Ios16.conv" : 60,
+      "Ios16.add" : 56,
+      "Ios16.linear" : 1,
+      "Ios16.matmul" : 24,
+      "Ios16.gelu" : 6,
+      "Ios16.reduceMean" : 39,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 19,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 48,
+      "Ios16.softmax" : 12
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 3072 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 3072, 1, 224]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 3072 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 3072, 1, 224]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/model.mil b/openai_whisper-base/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..bdf5741552d15c82459d7e398927f7972a4fe195
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,1115 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 224]> decoder_key_padding_mask, tensor<fp16, [1, 512, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 3072, 1, 224]> key_cache, tensor<fp16, [1, 224]> kv_cache_update_mask, tensor<fp16, [1, 3072, 1, 224]> value_cache) {
+            tensor<int32, []> var_28_axis_0 = const()[name = tensor<string, []>("op_28_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_28_batch_dims_0 = const()[name = tensor<string, []>("op_28_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51865, 512]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 512]> var_28_cast_fp16 = gather(axis = var_28_axis_0, batch_dims = var_28_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<int32, []> var_32_axis_0 = const()[name = tensor<string, []>("op_32_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_32_batch_dims_0 = const()[name = tensor<string, []>("op_32_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 512]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53109888)))];
+            tensor<fp16, [1, 512]> var_32_cast_fp16 = gather(axis = var_32_axis_0, batch_dims = var_32_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_32_cast_fp16")];
+            tensor<fp16, [1, 512]> hidden_states_1_cast_fp16 = add(x = var_28_cast_fp16, y = var_32_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_46_axes_0 = const()[name = tensor<string, []>("op_46_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1]> var_46_cast_fp16 = expand_dims(axes = var_46_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_46_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 512, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_46_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [6]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [6]>([512, 512, 512, 512, 512, 512])];
+            tensor<int32, []> var_51_axis_0 = const()[name = tensor<string, []>("op_51_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_0, tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_1, tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_2, tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_3, tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_4, tensor<fp16, [1, 512, 1, 224]> var_51_cast_fp16_5 = split(axis = var_51_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<int32, [6]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [6]>([512, 512, 512, 512, 512, 512])];
+            tensor<int32, []> var_60_axis_0 = const()[name = tensor<string, []>("op_60_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_0, tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_1, tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_2, tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_3, tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_4, tensor<fp16, [1, 512, 1, 224]> var_60_cast_fp16_5 = split(axis = var_60_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_60_cast_fp16")];
+            tensor<int32, []> var_72 = const()[name = tensor<string, []>("op_72"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_79 = const()[name = tensor<string, []>("op_79"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_92 = const()[name = tensor<string, []>("op_92"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_92, keep_dims = var_80, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_97_cast_fp16 = reduce_mean(axes = var_96, keep_dims = var_80, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_97_cast_fp16")];
+            tensor<fp16, []> var_98_to_fp16 = const()[name = tensor<string, []>("op_98_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_99_cast_fp16 = add(x = var_97_cast_fp16, y = var_98_to_fp16)[name = tensor<string, []>("op_99_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_99_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [512]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53568704)))];
+            tensor<fp16, [512]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53569792)))];
+            tensor<fp16, [512]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53570880)))];
+            tensor<fp16, [512]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53571968)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_114 = const()[name = tensor<string, []>("op_114"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_116 = const()[name = tensor<string, []>("op_116"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53573056)))];
+            tensor<fp16, [512]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54097408)))];
+            tensor<fp16, [1, 512, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_116, groups = var_79, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_114, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_120 = const()[name = tensor<string, []>("op_120"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_122 = const()[name = tensor<string, []>("op_122"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54098496)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_1_cast_fp16 = conv(dilations = var_122, groups = var_79, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = var_120, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<int32, [2]> var_127 = const()[name = tensor<string, []>("op_127"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_129 = const()[name = tensor<string, []>("op_129"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54622848)))];
+            tensor<fp16, [512]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55147200)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_129, groups = var_79, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = var_127, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_133_axes_0 = const()[name = tensor<string, []>("op_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_133_cast_fp16 = expand_dims(axes = var_133_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_133_cast_fp16")];
+            tensor<int32, [1]> var_134_axes_0 = const()[name = tensor<string, []>("op_134_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_134_cast_fp16 = expand_dims(axes = var_134_axes_0, x = var_133_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_136_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
+            tensor<fp16, []> var_73_to_fp16 = const()[name = tensor<string, []>("op_73_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 224]> var_137_cast_fp16 = sub(x = var_73_to_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_137_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_138_cast_fp16 = mul(x = var_51_cast_fp16_0, y = var_137_cast_fp16)[name = tensor<string, []>("op_138_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_1_cast_fp16 = add(x = var_136_cast_fp16, y = var_138_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_140_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_140_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_142_cast_fp16 = mul(x = var_60_cast_fp16_0, y = var_137_cast_fp16)[name = tensor<string, []>("op_142_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_1_cast_fp16 = add(x = var_140_cast_fp16, y = var_142_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_145 = const()[name = tensor<string, []>("op_145"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_146_cast_fp16 = reshape(shape = var_145, x = query_1_cast_fp16)[name = tensor<string, []>("op_146_cast_fp16")];
+            tensor<fp16, []> var_147_to_fp16 = const()[name = tensor<string, []>("op_147_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_148_cast_fp16 = mul(x = var_146_cast_fp16, y = var_147_to_fp16)[name = tensor<string, []>("op_148_cast_fp16")];
+            tensor<int32, [4]> var_149 = const()[name = tensor<string, []>("op_149"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_150_cast_fp16 = reshape(shape = var_149, x = key_1_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_148_cast_fp16, y = var_150_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_154_axes_0 = const()[name = tensor<string, []>("op_154_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_154_cast_fp16 = expand_dims(axes = var_154_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_154_cast_fp16")];
+            tensor<int32, [1]> var_155_axes_0 = const()[name = tensor<string, []>("op_155_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_155_cast_fp16 = expand_dims(axes = var_155_axes_0, x = var_154_cast_fp16)[name = tensor<string, []>("op_155_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_158_cast_fp16 = softmax(axis = var_72, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_158_cast_fp16")];
+            tensor<int32, [4]> var_159 = const()[name = tensor<string, []>("op_159"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_160_cast_fp16 = reshape(shape = var_159, x = value_1_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_160_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_163 = const()[name = tensor<string, []>("op_163"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_1_cast_fp16 = reshape(shape = var_163, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_167 = const()[name = tensor<string, []>("op_167"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_169 = const()[name = tensor<string, []>("op_169"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55148288)))];
+            tensor<fp16, [512]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55672640)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_169, groups = var_79, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_167, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_179 = const()[name = tensor<string, []>("op_179"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_179, keep_dims = var_80, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_184_cast_fp16 = reduce_mean(axes = var_183, keep_dims = var_80, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_184_cast_fp16")];
+            tensor<fp16, []> var_185_to_fp16 = const()[name = tensor<string, []>("op_185_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_186_cast_fp16 = add(x = var_184_cast_fp16, y = var_185_to_fp16)[name = tensor<string, []>("op_186_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_186_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [512]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55673728)))];
+            tensor<fp16, [512]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55674816)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_201 = const()[name = tensor<string, []>("op_201"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_203 = const()[name = tensor<string, []>("op_203"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55675904)))];
+            tensor<fp16, [512]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56200256)))];
+            tensor<fp16, [1, 512, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = var_203, groups = var_79, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_201, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_207 = const()[name = tensor<string, []>("op_207"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_209 = const()[name = tensor<string, []>("op_209"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56201344)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_209, groups = var_79, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_207, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_214 = const()[name = tensor<string, []>("op_214"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56725696)))];
+            tensor<fp16, [512]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57250048)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = var_216, groups = var_79, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_214, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_220 = const()[name = tensor<string, []>("op_220"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_221_cast_fp16 = reshape(shape = var_220, x = query_3_cast_fp16)[name = tensor<string, []>("op_221_cast_fp16")];
+            tensor<fp16, []> var_222_to_fp16 = const()[name = tensor<string, []>("op_222_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_223_cast_fp16 = mul(x = var_221_cast_fp16, y = var_222_to_fp16)[name = tensor<string, []>("op_223_cast_fp16")];
+            tensor<int32, [4]> var_224 = const()[name = tensor<string, []>("op_224"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_225_cast_fp16 = reshape(shape = var_224, x = key_3_cast_fp16)[name = tensor<string, []>("op_225_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_223_cast_fp16, y = var_225_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_72, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_229 = const()[name = tensor<string, []>("op_229"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_230_cast_fp16 = reshape(shape = var_229, x = value_3_cast_fp16)[name = tensor<string, []>("op_230_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_230_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_233 = const()[name = tensor<string, []>("op_233"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_3_cast_fp16 = reshape(shape = var_233, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_239 = const()[name = tensor<string, []>("op_239"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57251136)))];
+            tensor<fp16, [512]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57775488)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = var_239, groups = var_79, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_237, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> var_245 = const()[name = tensor<string, []>("op_245"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_245, keep_dims = var_80, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_249 = const()[name = tensor<string, []>("op_249"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_250_cast_fp16 = reduce_mean(axes = var_249, keep_dims = var_80, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<fp16, []> var_251_to_fp16 = const()[name = tensor<string, []>("op_251_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_252_cast_fp16 = add(x = var_250_cast_fp16, y = var_251_to_fp16)[name = tensor<string, []>("op_252_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_252_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [512]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57776576)))];
+            tensor<fp16, [512]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57777664)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [2]> var_263 = const()[name = tensor<string, []>("op_263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_265 = const()[name = tensor<string, []>("op_265"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57778752)))];
+            tensor<fp16, [2048]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59875968)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_265, groups = var_79, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_263, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_271 = const()[name = tensor<string, []>("op_271"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_273 = const()[name = tensor<string, []>("op_273"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59880128)))];
+            tensor<fp16, [512]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61977344)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_273, groups = var_79, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = var_271, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_286 = const()[name = tensor<string, []>("op_286"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_293 = const()[name = tensor<string, []>("op_293"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_294 = const()[name = tensor<string, []>("op_294"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_306, keep_dims = var_294, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_311_cast_fp16 = reduce_mean(axes = var_310, keep_dims = var_294, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<fp16, []> var_312_to_fp16 = const()[name = tensor<string, []>("op_312_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_313_cast_fp16 = add(x = var_311_cast_fp16, y = var_312_to_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_313_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [512]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61978432)))];
+            tensor<fp16, [512]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61979520)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<int32, [2]> var_328 = const()[name = tensor<string, []>("op_328"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_330 = const()[name = tensor<string, []>("op_330"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61980608)))];
+            tensor<fp16, [512]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62504960)))];
+            tensor<fp16, [1, 512, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_330, groups = var_293, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_328, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_334 = const()[name = tensor<string, []>("op_334"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62506048)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_3_cast_fp16 = conv(dilations = var_336, groups = var_293, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = var_334, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<int32, [2]> var_341 = const()[name = tensor<string, []>("op_341"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_343 = const()[name = tensor<string, []>("op_343"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63030400)))];
+            tensor<fp16, [512]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63554752)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_343, groups = var_293, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = var_341, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_350_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_350_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_352_cast_fp16 = mul(x = var_51_cast_fp16_1, y = var_137_cast_fp16)[name = tensor<string, []>("op_352_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_5_cast_fp16 = add(x = var_350_cast_fp16, y = var_352_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_354_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_354_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_356_cast_fp16 = mul(x = var_60_cast_fp16_1, y = var_137_cast_fp16)[name = tensor<string, []>("op_356_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_5_cast_fp16 = add(x = var_354_cast_fp16, y = var_356_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_359 = const()[name = tensor<string, []>("op_359"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_360_cast_fp16 = reshape(shape = var_359, x = query_5_cast_fp16)[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<fp16, []> var_361_to_fp16 = const()[name = tensor<string, []>("op_361_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_362_cast_fp16 = mul(x = var_360_cast_fp16, y = var_361_to_fp16)[name = tensor<string, []>("op_362_cast_fp16")];
+            tensor<int32, [4]> var_363 = const()[name = tensor<string, []>("op_363"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_364_cast_fp16 = reshape(shape = var_363, x = key_5_cast_fp16)[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_362_cast_fp16, y = var_364_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_372_cast_fp16 = softmax(axis = var_286, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_372_cast_fp16")];
+            tensor<int32, [4]> var_373 = const()[name = tensor<string, []>("op_373"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_374_cast_fp16 = reshape(shape = var_373, x = value_5_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_374_cast_fp16, y = var_372_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_377 = const()[name = tensor<string, []>("op_377"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_11_cast_fp16 = reshape(shape = var_377, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_381 = const()[name = tensor<string, []>("op_381"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_383 = const()[name = tensor<string, []>("op_383"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63555840)))];
+            tensor<fp16, [512]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64080192)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_383, groups = var_293, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_381, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> var_393 = const()[name = tensor<string, []>("op_393"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_393, keep_dims = var_294, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_397 = const()[name = tensor<string, []>("op_397"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_398_cast_fp16 = reduce_mean(axes = var_397, keep_dims = var_294, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_398_cast_fp16")];
+            tensor<fp16, []> var_399_to_fp16 = const()[name = tensor<string, []>("op_399_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_400_cast_fp16 = add(x = var_398_cast_fp16, y = var_399_to_fp16)[name = tensor<string, []>("op_400_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_400_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [512]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64081280)))];
+            tensor<fp16, [512]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64082368)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<int32, [2]> var_415 = const()[name = tensor<string, []>("op_415"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_417 = const()[name = tensor<string, []>("op_417"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64083456)))];
+            tensor<fp16, [512]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64607808)))];
+            tensor<fp16, [1, 512, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_417, groups = var_293, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_415, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_421 = const()[name = tensor<string, []>("op_421"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_423 = const()[name = tensor<string, []>("op_423"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64608896)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_423, groups = var_293, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_421, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_428 = const()[name = tensor<string, []>("op_428"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_430 = const()[name = tensor<string, []>("op_430"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65133248)))];
+            tensor<fp16, [512]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65657600)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = var_430, groups = var_293, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_428, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_434 = const()[name = tensor<string, []>("op_434"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_435_cast_fp16 = reshape(shape = var_434, x = query_7_cast_fp16)[name = tensor<string, []>("op_435_cast_fp16")];
+            tensor<fp16, []> var_436_to_fp16 = const()[name = tensor<string, []>("op_436_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_437_cast_fp16 = mul(x = var_435_cast_fp16, y = var_436_to_fp16)[name = tensor<string, []>("op_437_cast_fp16")];
+            tensor<int32, [4]> var_438 = const()[name = tensor<string, []>("op_438"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_439_cast_fp16 = reshape(shape = var_438, x = key_7_cast_fp16)[name = tensor<string, []>("op_439_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_437_cast_fp16, y = var_439_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_286, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_443 = const()[name = tensor<string, []>("op_443"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_444_cast_fp16 = reshape(shape = var_443, x = value_7_cast_fp16)[name = tensor<string, []>("op_444_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_444_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_447 = const()[name = tensor<string, []>("op_447"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_13_cast_fp16 = reshape(shape = var_447, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<int32, [2]> var_451 = const()[name = tensor<string, []>("op_451"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_453 = const()[name = tensor<string, []>("op_453"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65658688)))];
+            tensor<fp16, [512]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66183040)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_453, groups = var_293, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_451, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_459 = const()[name = tensor<string, []>("op_459"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_459, keep_dims = var_294, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_463 = const()[name = tensor<string, []>("op_463"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_464_cast_fp16 = reduce_mean(axes = var_463, keep_dims = var_294, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_464_cast_fp16")];
+            tensor<fp16, []> var_465_to_fp16 = const()[name = tensor<string, []>("op_465_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_466_cast_fp16 = add(x = var_464_cast_fp16, y = var_465_to_fp16)[name = tensor<string, []>("op_466_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_466_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [512]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66184128)))];
+            tensor<fp16, [512]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66185216)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_477 = const()[name = tensor<string, []>("op_477"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_479 = const()[name = tensor<string, []>("op_479"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66186304)))];
+            tensor<fp16, [2048]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68283520)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_479, groups = var_293, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_477, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_485 = const()[name = tensor<string, []>("op_485"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_487 = const()[name = tensor<string, []>("op_487"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68287680)))];
+            tensor<fp16, [512]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70384896)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_487, groups = var_293, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_485, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_500 = const()[name = tensor<string, []>("op_500"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_507 = const()[name = tensor<string, []>("op_507"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_508 = const()[name = tensor<string, []>("op_508"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_520 = const()[name = tensor<string, []>("op_520"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_520, keep_dims = var_508, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_524 = const()[name = tensor<string, []>("op_524"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_525_cast_fp16 = reduce_mean(axes = var_524, keep_dims = var_508, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<fp16, []> var_526_to_fp16 = const()[name = tensor<string, []>("op_526_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_527_cast_fp16 = add(x = var_525_cast_fp16, y = var_526_to_fp16)[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_527_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [512]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70385984)))];
+            tensor<fp16, [512]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70387072)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_542 = const()[name = tensor<string, []>("op_542"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_544 = const()[name = tensor<string, []>("op_544"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70388160)))];
+            tensor<fp16, [512]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70912512)))];
+            tensor<fp16, [1, 512, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_544, groups = var_507, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_542, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_548 = const()[name = tensor<string, []>("op_548"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_550 = const()[name = tensor<string, []>("op_550"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70913600)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_5_cast_fp16 = conv(dilations = var_550, groups = var_507, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = var_548, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<int32, [2]> var_555 = const()[name = tensor<string, []>("op_555"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_557 = const()[name = tensor<string, []>("op_557"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71437952)))];
+            tensor<fp16, [512]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71962304)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_557, groups = var_507, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = var_555, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_564_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_564_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_566_cast_fp16 = mul(x = var_51_cast_fp16_2, y = var_137_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_9_cast_fp16 = add(x = var_564_cast_fp16, y = var_566_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_568_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_568_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_570_cast_fp16 = mul(x = var_60_cast_fp16_2, y = var_137_cast_fp16)[name = tensor<string, []>("op_570_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_9_cast_fp16 = add(x = var_568_cast_fp16, y = var_570_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_573 = const()[name = tensor<string, []>("op_573"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_574_cast_fp16 = reshape(shape = var_573, x = query_9_cast_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<fp16, []> var_575_to_fp16 = const()[name = tensor<string, []>("op_575_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_576_cast_fp16 = mul(x = var_574_cast_fp16, y = var_575_to_fp16)[name = tensor<string, []>("op_576_cast_fp16")];
+            tensor<int32, [4]> var_577 = const()[name = tensor<string, []>("op_577"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_578_cast_fp16 = reshape(shape = var_577, x = key_9_cast_fp16)[name = tensor<string, []>("op_578_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_576_cast_fp16, y = var_578_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_586_cast_fp16 = softmax(axis = var_500, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_586_cast_fp16")];
+            tensor<int32, [4]> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_588_cast_fp16 = reshape(shape = var_587, x = value_9_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_588_cast_fp16, y = var_586_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_21_cast_fp16 = reshape(shape = var_591, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<int32, [2]> var_595 = const()[name = tensor<string, []>("op_595"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_597 = const()[name = tensor<string, []>("op_597"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71963392)))];
+            tensor<fp16, [512]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72487744)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_597, groups = var_507, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_595, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_607 = const()[name = tensor<string, []>("op_607"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_607, keep_dims = var_508, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_612_cast_fp16 = reduce_mean(axes = var_611, keep_dims = var_508, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
+            tensor<fp16, []> var_613_to_fp16 = const()[name = tensor<string, []>("op_613_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_614_cast_fp16 = add(x = var_612_cast_fp16, y = var_613_to_fp16)[name = tensor<string, []>("op_614_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_614_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [512]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72488832)))];
+            tensor<fp16, [512]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72489920)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_629 = const()[name = tensor<string, []>("op_629"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_631 = const()[name = tensor<string, []>("op_631"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72491008)))];
+            tensor<fp16, [512]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73015360)))];
+            tensor<fp16, [1, 512, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = var_631, groups = var_507, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_629, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_635 = const()[name = tensor<string, []>("op_635"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_637 = const()[name = tensor<string, []>("op_637"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73016448)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_637, groups = var_507, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_635, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_642 = const()[name = tensor<string, []>("op_642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_644 = const()[name = tensor<string, []>("op_644"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73540800)))];
+            tensor<fp16, [512]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74065152)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = var_644, groups = var_507, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_642, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_648 = const()[name = tensor<string, []>("op_648"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_649_cast_fp16 = reshape(shape = var_648, x = query_11_cast_fp16)[name = tensor<string, []>("op_649_cast_fp16")];
+            tensor<fp16, []> var_650_to_fp16 = const()[name = tensor<string, []>("op_650_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_651_cast_fp16 = mul(x = var_649_cast_fp16, y = var_650_to_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<int32, [4]> var_652 = const()[name = tensor<string, []>("op_652"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_653_cast_fp16 = reshape(shape = var_652, x = key_11_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_651_cast_fp16, y = var_653_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_500, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_657 = const()[name = tensor<string, []>("op_657"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_658_cast_fp16 = reshape(shape = var_657, x = value_11_cast_fp16)[name = tensor<string, []>("op_658_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_658_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_23_cast_fp16 = reshape(shape = var_661, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_665 = const()[name = tensor<string, []>("op_665"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_667 = const()[name = tensor<string, []>("op_667"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74066240)))];
+            tensor<fp16, [512]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74590592)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = var_667, groups = var_507, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_665, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> var_673 = const()[name = tensor<string, []>("op_673"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_673, keep_dims = var_508, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_677 = const()[name = tensor<string, []>("op_677"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_678_cast_fp16 = reduce_mean(axes = var_677, keep_dims = var_508, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_678_cast_fp16")];
+            tensor<fp16, []> var_679_to_fp16 = const()[name = tensor<string, []>("op_679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_680_cast_fp16 = add(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor<string, []>("op_680_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_680_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [512]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74591680)))];
+            tensor<fp16, [512]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74592768)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_691 = const()[name = tensor<string, []>("op_691"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_693 = const()[name = tensor<string, []>("op_693"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74593856)))];
+            tensor<fp16, [2048]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76691072)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_693, groups = var_507, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = var_691, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<int32, [2]> var_699 = const()[name = tensor<string, []>("op_699"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_701 = const()[name = tensor<string, []>("op_701"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76695232)))];
+            tensor<fp16, [512]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78792448)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_701, groups = var_507, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_699, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_714 = const()[name = tensor<string, []>("op_714"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_721 = const()[name = tensor<string, []>("op_721"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_722 = const()[name = tensor<string, []>("op_722"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_734 = const()[name = tensor<string, []>("op_734"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_734, keep_dims = var_722, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_739_cast_fp16 = reduce_mean(axes = var_738, keep_dims = var_722, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<fp16, []> var_740_to_fp16 = const()[name = tensor<string, []>("op_740_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_741_cast_fp16 = add(x = var_739_cast_fp16, y = var_740_to_fp16)[name = tensor<string, []>("op_741_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_741_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [512]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78793536)))];
+            tensor<fp16, [512]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78794624)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<int32, [2]> var_756 = const()[name = tensor<string, []>("op_756"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_758 = const()[name = tensor<string, []>("op_758"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78795712)))];
+            tensor<fp16, [512]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79320064)))];
+            tensor<fp16, [1, 512, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_758, groups = var_721, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_756, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_764 = const()[name = tensor<string, []>("op_764"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_7_pad_type_0 = const()[name = tensor<string, []>("current_key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_7_pad_0 = const()[name = tensor<string, []>("current_key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79321152)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_7_cast_fp16 = conv(dilations = var_764, groups = var_721, pad = current_key_7_pad_0, pad_type = current_key_7_pad_type_0, strides = var_762, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_7_cast_fp16")];
+            tensor<int32, [2]> var_769 = const()[name = tensor<string, []>("op_769"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_771 = const()[name = tensor<string, []>("op_771"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_7_pad_type_0 = const()[name = tensor<string, []>("current_value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_7_pad_0 = const()[name = tensor<string, []>("current_value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79845504)))];
+            tensor<fp16, [512]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80369856)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_771, groups = var_721, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = var_769, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_778_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_778_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_780_cast_fp16 = mul(x = var_51_cast_fp16_3, y = var_137_cast_fp16)[name = tensor<string, []>("op_780_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_13_cast_fp16 = add(x = var_778_cast_fp16, y = var_780_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_782_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_782_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_784_cast_fp16 = mul(x = var_60_cast_fp16_3, y = var_137_cast_fp16)[name = tensor<string, []>("op_784_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_13_cast_fp16 = add(x = var_782_cast_fp16, y = var_784_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_787 = const()[name = tensor<string, []>("op_787"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_788_cast_fp16 = reshape(shape = var_787, x = query_13_cast_fp16)[name = tensor<string, []>("op_788_cast_fp16")];
+            tensor<fp16, []> var_789_to_fp16 = const()[name = tensor<string, []>("op_789_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_790_cast_fp16 = mul(x = var_788_cast_fp16, y = var_789_to_fp16)[name = tensor<string, []>("op_790_cast_fp16")];
+            tensor<int32, [4]> var_791 = const()[name = tensor<string, []>("op_791"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_792_cast_fp16 = reshape(shape = var_791, x = key_13_cast_fp16)[name = tensor<string, []>("op_792_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_790_cast_fp16, y = var_792_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_800_cast_fp16 = softmax(axis = var_714, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_800_cast_fp16")];
+            tensor<int32, [4]> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_802_cast_fp16 = reshape(shape = var_801, x = value_13_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_802_cast_fp16, y = var_800_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_805 = const()[name = tensor<string, []>("op_805"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_31_cast_fp16 = reshape(shape = var_805, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_809 = const()[name = tensor<string, []>("op_809"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80370944)))];
+            tensor<fp16, [512]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80895296)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_811, groups = var_721, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = var_809, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> var_821 = const()[name = tensor<string, []>("op_821"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_821, keep_dims = var_722, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_825 = const()[name = tensor<string, []>("op_825"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_826_cast_fp16 = reduce_mean(axes = var_825, keep_dims = var_722, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<fp16, []> var_827_to_fp16 = const()[name = tensor<string, []>("op_827_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_828_cast_fp16 = add(x = var_826_cast_fp16, y = var_827_to_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_828_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [512]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80896384)))];
+            tensor<fp16, [512]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80897472)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<int32, [2]> var_843 = const()[name = tensor<string, []>("op_843"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_845 = const()[name = tensor<string, []>("op_845"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80898560)))];
+            tensor<fp16, [512]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81422912)))];
+            tensor<fp16, [1, 512, 1, 1]> query_15_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = var_845, groups = var_721, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_843, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<int32, [2]> var_849 = const()[name = tensor<string, []>("op_849"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_851 = const()[name = tensor<string, []>("op_851"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81424000)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_15_cast_fp16 = conv(dilations = var_851, groups = var_721, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_849, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<int32, [2]> var_856 = const()[name = tensor<string, []>("op_856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_858 = const()[name = tensor<string, []>("op_858"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81948352)))];
+            tensor<fp16, [512]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82472704)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = var_858, groups = var_721, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_856, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_862 = const()[name = tensor<string, []>("op_862"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_863_cast_fp16 = reshape(shape = var_862, x = query_15_cast_fp16)[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<fp16, []> var_864_to_fp16 = const()[name = tensor<string, []>("op_864_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_865_cast_fp16 = mul(x = var_863_cast_fp16, y = var_864_to_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<int32, [4]> var_866 = const()[name = tensor<string, []>("op_866"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_867_cast_fp16 = reshape(shape = var_866, x = key_15_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<bool, []> mh_w_23_transpose_x_0 = const()[name = tensor<string, []>("mh_w_23_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_23_transpose_y_0 = const()[name = tensor<string, []>("mh_w_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_865_cast_fp16, y = var_867_cast_fp16)[name = tensor<string, []>("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_714, x = mh_w_23_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_871 = const()[name = tensor<string, []>("op_871"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_872_cast_fp16 = reshape(shape = var_871, x = value_15_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<bool, []> attn_15_transpose_x_0 = const()[name = tensor<string, []>("attn_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_15_transpose_y_0 = const()[name = tensor<string, []>("attn_15_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_872_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_33_cast_fp16 = reshape(shape = var_875, x = attn_15_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_879 = const()[name = tensor<string, []>("op_879"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_881 = const()[name = tensor<string, []>("op_881"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82473792)))];
+            tensor<fp16, [512]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82998144)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = var_881, groups = var_721, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = var_879, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_890 = const()[name = tensor<string, []>("op_890"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_890, keep_dims = var_722, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_894 = const()[name = tensor<string, []>("op_894"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_895_cast_fp16 = reduce_mean(axes = var_894, keep_dims = var_722, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_895_cast_fp16")];
+            tensor<fp16, []> var_896_to_fp16 = const()[name = tensor<string, []>("op_896_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_897_cast_fp16 = add(x = var_895_cast_fp16, y = var_896_to_fp16)[name = tensor<string, []>("op_897_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_897_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [512]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82999232)))];
+            tensor<fp16, [512]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83000320)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_908 = const()[name = tensor<string, []>("op_908"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_910 = const()[name = tensor<string, []>("op_910"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83001408)))];
+            tensor<fp16, [2048]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85098624)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_910, groups = var_721, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_908, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_916 = const()[name = tensor<string, []>("op_916"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_918 = const()[name = tensor<string, []>("op_918"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85102784)))];
+            tensor<fp16, [512]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87200000)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_918, groups = var_721, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_916, weight = layers_3_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_932 = const()[name = tensor<string, []>("op_932"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_939 = const()[name = tensor<string, []>("op_939"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_940 = const()[name = tensor<string, []>("op_940"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_952 = const()[name = tensor<string, []>("op_952"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_25_cast_fp16 = reduce_mean(axes = var_952, keep_dims = var_940, x = inputs_25_cast_fp16)[name = tensor<string, []>("channels_mean_25_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_25_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_sq_25_cast_fp16")];
+            tensor<int32, [1]> var_956 = const()[name = tensor<string, []>("op_956"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_957_cast_fp16 = reduce_mean(axes = var_956, keep_dims = var_940, x = zero_mean_sq_25_cast_fp16)[name = tensor<string, []>("op_957_cast_fp16")];
+            tensor<fp16, []> var_958_to_fp16 = const()[name = tensor<string, []>("op_958_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_959_cast_fp16 = add(x = var_957_cast_fp16, y = var_958_to_fp16)[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<fp16, []> denom_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_959_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [512]> obj_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87201088)))];
+            tensor<fp16, [512]> obj_57_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_57_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87202176)))];
+            tensor<fp16, []> obj_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_57_cast_fp16")];
+            tensor<int32, [2]> var_974 = const()[name = tensor<string, []>("op_974"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_976 = const()[name = tensor<string, []>("op_976"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87203264)))];
+            tensor<fp16, [512]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87727616)))];
+            tensor<fp16, [1, 512, 1, 1]> query_17_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_976, groups = var_939, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_974, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<int32, [2]> var_980 = const()[name = tensor<string, []>("op_980"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_982 = const()[name = tensor<string, []>("op_982"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_9_pad_type_0 = const()[name = tensor<string, []>("current_key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_9_pad_0 = const()[name = tensor<string, []>("current_key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87728704)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_9_cast_fp16 = conv(dilations = var_982, groups = var_939, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = var_980, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_key_9_cast_fp16")];
+            tensor<int32, [2]> var_987 = const()[name = tensor<string, []>("op_987"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_989 = const()[name = tensor<string, []>("op_989"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_9_pad_type_0 = const()[name = tensor<string, []>("current_value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_9_pad_0 = const()[name = tensor<string, []>("current_value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88253056)))];
+            tensor<fp16, [512]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88777408)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_989, groups = var_939, pad = current_value_9_pad_0, pad_type = current_value_9_pad_type_0, strides = var_987, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_996_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_996_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_998_cast_fp16 = mul(x = var_51_cast_fp16_4, y = var_137_cast_fp16)[name = tensor<string, []>("op_998_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_17_cast_fp16 = add(x = var_996_cast_fp16, y = var_998_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1000_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_1000_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1002_cast_fp16 = mul(x = var_60_cast_fp16_4, y = var_137_cast_fp16)[name = tensor<string, []>("op_1002_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_17_cast_fp16 = add(x = var_1000_cast_fp16, y = var_1002_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1005 = const()[name = tensor<string, []>("op_1005"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_1006_cast_fp16 = reshape(shape = var_1005, x = query_17_cast_fp16)[name = tensor<string, []>("op_1006_cast_fp16")];
+            tensor<fp16, []> var_1007_to_fp16 = const()[name = tensor<string, []>("op_1007_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_1008_cast_fp16 = mul(x = var_1006_cast_fp16, y = var_1007_to_fp16)[name = tensor<string, []>("op_1008_cast_fp16")];
+            tensor<int32, [4]> var_1009 = const()[name = tensor<string, []>("op_1009"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_1010_cast_fp16 = reshape(shape = var_1009, x = key_17_cast_fp16)[name = tensor<string, []>("op_1010_cast_fp16")];
+            tensor<bool, []> mh_w_25_transpose_x_0 = const()[name = tensor<string, []>("mh_w_25_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_25_transpose_y_0 = const()[name = tensor<string, []>("mh_w_25_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1008_cast_fp16, y = var_1010_cast_fp16)[name = tensor<string, []>("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_1018_cast_fp16 = softmax(axis = var_932, x = mh_w_27_cast_fp16)[name = tensor<string, []>("op_1018_cast_fp16")];
+            tensor<int32, [4]> var_1019 = const()[name = tensor<string, []>("op_1019"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_1020_cast_fp16 = reshape(shape = var_1019, x = value_17_cast_fp16)[name = tensor<string, []>("op_1020_cast_fp16")];
+            tensor<bool, []> attn_17_transpose_x_0 = const()[name = tensor<string, []>("attn_17_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_17_transpose_y_0 = const()[name = tensor<string, []>("attn_17_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1020_cast_fp16, y = var_1018_cast_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1023 = const()[name = tensor<string, []>("op_1023"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1023, x = attn_17_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_1027 = const()[name = tensor<string, []>("op_1027"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1029 = const()[name = tensor<string, []>("op_1029"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_63_pad_type_0 = const()[name = tensor<string, []>("obj_63_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_63_pad_0 = const()[name = tensor<string, []>("obj_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88778496)))];
+            tensor<fp16, [512]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89302848)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_63_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_1029, groups = var_939, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = var_1027, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_63_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> var_1039 = const()[name = tensor<string, []>("op_1039"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_27_cast_fp16 = reduce_mean(axes = var_1039, keep_dims = var_940, x = inputs_27_cast_fp16)[name = tensor<string, []>("channels_mean_27_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_27_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_sq_27_cast_fp16")];
+            tensor<int32, [1]> var_1043 = const()[name = tensor<string, []>("op_1043"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1044_cast_fp16 = reduce_mean(axes = var_1043, keep_dims = var_940, x = zero_mean_sq_27_cast_fp16)[name = tensor<string, []>("op_1044_cast_fp16")];
+            tensor<fp16, []> var_1045_to_fp16 = const()[name = tensor<string, []>("op_1045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1046_cast_fp16 = add(x = var_1044_cast_fp16, y = var_1045_to_fp16)[name = tensor<string, []>("op_1046_cast_fp16")];
+            tensor<fp16, []> denom_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_1046_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [512]> obj_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89303936)))];
+            tensor<fp16, [512]> obj_65_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_65_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89305024)))];
+            tensor<fp16, []> obj_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("obj_65_cast_fp16")];
+            tensor<int32, [2]> var_1061 = const()[name = tensor<string, []>("op_1061"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1063 = const()[name = tensor<string, []>("op_1063"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89306112)))];
+            tensor<fp16, [512]> layers_4_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89830464)))];
+            tensor<fp16, [1, 512, 1, 1]> query_19_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_bias_to_fp16, dilations = var_1063, groups = var_939, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_1061, weight = layers_4_encoder_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<int32, [2]> var_1067 = const()[name = tensor<string, []>("op_1067"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1069 = const()[name = tensor<string, []>("op_1069"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89831552)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_19_cast_fp16 = conv(dilations = var_1069, groups = var_939, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_1067, weight = layers_4_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<int32, [2]> var_1074 = const()[name = tensor<string, []>("op_1074"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1076 = const()[name = tensor<string, []>("op_1076"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90355904)))];
+            tensor<fp16, [512]> layers_4_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90880256)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_bias_to_fp16, dilations = var_1076, groups = var_939, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_1074, weight = layers_4_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1080 = const()[name = tensor<string, []>("op_1080"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_1081_cast_fp16 = reshape(shape = var_1080, x = query_19_cast_fp16)[name = tensor<string, []>("op_1081_cast_fp16")];
+            tensor<fp16, []> var_1082_to_fp16 = const()[name = tensor<string, []>("op_1082_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_1083_cast_fp16 = mul(x = var_1081_cast_fp16, y = var_1082_to_fp16)[name = tensor<string, []>("op_1083_cast_fp16")];
+            tensor<int32, [4]> var_1084 = const()[name = tensor<string, []>("op_1084"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_1085_cast_fp16 = reshape(shape = var_1084, x = key_19_cast_fp16)[name = tensor<string, []>("op_1085_cast_fp16")];
+            tensor<bool, []> mh_w_29_transpose_x_0 = const()[name = tensor<string, []>("mh_w_29_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_29_transpose_y_0 = const()[name = tensor<string, []>("mh_w_29_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1083_cast_fp16, y = var_1085_cast_fp16)[name = tensor<string, []>("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_69_cast_fp16 = softmax(axis = var_932, x = mh_w_29_cast_fp16)[name = tensor<string, []>("obj_69_cast_fp16")];
+            tensor<int32, [4]> var_1089 = const()[name = tensor<string, []>("op_1089"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_1090_cast_fp16 = reshape(shape = var_1089, x = value_19_cast_fp16)[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<bool, []> attn_19_transpose_x_0 = const()[name = tensor<string, []>("attn_19_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_19_transpose_y_0 = const()[name = tensor<string, []>("attn_19_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1090_cast_fp16, y = obj_69_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1093 = const()[name = tensor<string, []>("op_1093"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1093, x = attn_19_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_1097 = const()[name = tensor<string, []>("op_1097"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1099 = const()[name = tensor<string, []>("op_1099"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_67_pad_type_0 = const()[name = tensor<string, []>("obj_67_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_67_pad_0 = const()[name = tensor<string, []>("obj_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_4_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90881344)))];
+            tensor<fp16, [512]> layers_4_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91405696)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_67_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_bias_to_fp16, dilations = var_1099, groups = var_939, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = var_1097, weight = layers_4_encoder_attn_o_proj_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("obj_67_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, [1]> var_1108 = const()[name = tensor<string, []>("op_1108"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_29_cast_fp16 = reduce_mean(axes = var_1108, keep_dims = var_940, x = inputs_29_cast_fp16)[name = tensor<string, []>("channels_mean_29_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_29_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_sq_29_cast_fp16")];
+            tensor<int32, [1]> var_1112 = const()[name = tensor<string, []>("op_1112"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1113_cast_fp16 = reduce_mean(axes = var_1112, keep_dims = var_940, x = zero_mean_sq_29_cast_fp16)[name = tensor<string, []>("op_1113_cast_fp16")];
+            tensor<fp16, []> var_1114_to_fp16 = const()[name = tensor<string, []>("op_1114_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1115_cast_fp16 = add(x = var_1113_cast_fp16, y = var_1114_to_fp16)[name = tensor<string, []>("op_1115_cast_fp16")];
+            tensor<fp16, []> denom_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_1115_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [512]> input_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_45_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91406784)))];
+            tensor<fp16, [512]> input_45_beta_0_to_fp16 = const()[name = tensor<string, []>("input_45_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91407872)))];
+            tensor<fp16, []> input_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<int32, [2]> var_1126 = const()[name = tensor<string, []>("op_1126"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1128 = const()[name = tensor<string, []>("op_1128"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_47_pad_type_0 = const()[name = tensor<string, []>("input_47_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_47_pad_0 = const()[name = tensor<string, []>("input_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91408960)))];
+            tensor<fp16, [2048]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93506176)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_47_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_1128, groups = var_939, pad = input_47_pad_0, pad_type = input_47_pad_type_0, strides = var_1126, weight = layers_4_fc1_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_mode_0 = const()[name = tensor<string, []>("input_49_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [2]> var_1134 = const()[name = tensor<string, []>("op_1134"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1136 = const()[name = tensor<string, []>("op_1136"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93510336)))];
+            tensor<fp16, [512]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95607552)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_11_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_1136, groups = var_939, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_1134, weight = layers_4_fc2_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, []> var_1150 = const()[name = tensor<string, []>("op_1150"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1157 = const()[name = tensor<string, []>("op_1157"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1158 = const()[name = tensor<string, []>("op_1158"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1170 = const()[name = tensor<string, []>("op_1170"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_31_cast_fp16 = reduce_mean(axes = var_1170, keep_dims = var_1158, x = inputs_31_cast_fp16)[name = tensor<string, []>("channels_mean_31_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_31_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_sq_31_cast_fp16")];
+            tensor<int32, [1]> var_1174 = const()[name = tensor<string, []>("op_1174"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1175_cast_fp16 = reduce_mean(axes = var_1174, keep_dims = var_1158, x = zero_mean_sq_31_cast_fp16)[name = tensor<string, []>("op_1175_cast_fp16")];
+            tensor<fp16, []> var_1176_to_fp16 = const()[name = tensor<string, []>("op_1176_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1177_cast_fp16 = add(x = var_1175_cast_fp16, y = var_1176_to_fp16)[name = tensor<string, []>("op_1177_cast_fp16")];
+            tensor<fp16, []> denom_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_1177_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [512]> obj_71_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_71_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95608640)))];
+            tensor<fp16, [512]> obj_71_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_71_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95609728)))];
+            tensor<fp16, []> obj_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("obj_71_cast_fp16")];
+            tensor<int32, [2]> var_1192 = const()[name = tensor<string, []>("op_1192"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1194 = const()[name = tensor<string, []>("op_1194"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95610816)))];
+            tensor<fp16, [512]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96135168)))];
+            tensor<fp16, [1, 512, 1, 1]> query_21_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_1194, groups = var_1157, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_1192, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<int32, [2]> var_1198 = const()[name = tensor<string, []>("op_1198"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1200 = const()[name = tensor<string, []>("op_1200"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96136256)))];
+            tensor<fp16, [1, 512, 1, 1]> current_key_cast_fp16 = conv(dilations = var_1200, groups = var_1157, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_1198, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<int32, [2]> var_1205 = const()[name = tensor<string, []>("op_1205"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1207 = const()[name = tensor<string, []>("op_1207"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96660608)))];
+            tensor<fp16, [512]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97184960)))];
+            tensor<fp16, [1, 512, 1, 1]> current_value_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_1207, groups = var_1157, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_1205, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1214_cast_fp16 = mul(x = current_key_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_1214_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1216_cast_fp16 = mul(x = var_51_cast_fp16_5, y = var_137_cast_fp16)[name = tensor<string, []>("op_1216_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> key_21_cast_fp16 = add(x = var_1214_cast_fp16, y = var_1216_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1218_cast_fp16 = mul(x = current_value_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("op_1218_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> var_1220_cast_fp16 = mul(x = var_60_cast_fp16_5, y = var_137_cast_fp16)[name = tensor<string, []>("op_1220_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 224]> value_21_cast_fp16 = add(x = var_1218_cast_fp16, y = var_1220_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_1223 = const()[name = tensor<string, []>("op_1223"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_1224_cast_fp16 = reshape(shape = var_1223, x = query_21_cast_fp16)[name = tensor<string, []>("op_1224_cast_fp16")];
+            tensor<fp16, []> var_1225_to_fp16 = const()[name = tensor<string, []>("op_1225_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor<string, []>("op_1226_cast_fp16")];
+            tensor<int32, [4]> var_1227 = const()[name = tensor<string, []>("op_1227"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_1228_cast_fp16 = reshape(shape = var_1227, x = key_21_cast_fp16)[name = tensor<string, []>("op_1228_cast_fp16")];
+            tensor<bool, []> mh_w_31_transpose_x_0 = const()[name = tensor<string, []>("mh_w_31_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_31_transpose_y_0 = const()[name = tensor<string, []>("mh_w_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1226_cast_fp16, y = var_1228_cast_fp16)[name = tensor<string, []>("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_155_cast_fp16)[name = tensor<string, []>("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 224]> var_1236_cast_fp16 = softmax(axis = var_1150, x = mh_w_33_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<int32, [4]> var_1237 = const()[name = tensor<string, []>("op_1237"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 224]> var_1238_cast_fp16 = reshape(shape = var_1237, x = value_21_cast_fp16)[name = tensor<string, []>("op_1238_cast_fp16")];
+            tensor<bool, []> attn_21_transpose_x_0 = const()[name = tensor<string, []>("attn_21_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_21_transpose_y_0 = const()[name = tensor<string, []>("attn_21_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1238_cast_fp16, y = var_1236_cast_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1241 = const()[name = tensor<string, []>("op_1241"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1241, x = attn_21_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<int32, [2]> var_1245 = const()[name = tensor<string, []>("op_1245"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1247 = const()[name = tensor<string, []>("op_1247"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_77_pad_type_0 = const()[name = tensor<string, []>("obj_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_77_pad_0 = const()[name = tensor<string, []>("obj_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97186048)))];
+            tensor<fp16, [512]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97710400)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_77_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_1247, groups = var_1157, pad = obj_77_pad_0, pad_type = obj_77_pad_type_0, strides = var_1245, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("obj_77_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, [1]> var_1257 = const()[name = tensor<string, []>("op_1257"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_33_cast_fp16 = reduce_mean(axes = var_1257, keep_dims = var_1158, x = inputs_33_cast_fp16)[name = tensor<string, []>("channels_mean_33_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_33_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_sq_33_cast_fp16")];
+            tensor<int32, [1]> var_1261 = const()[name = tensor<string, []>("op_1261"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1262_cast_fp16 = reduce_mean(axes = var_1261, keep_dims = var_1158, x = zero_mean_sq_33_cast_fp16)[name = tensor<string, []>("op_1262_cast_fp16")];
+            tensor<fp16, []> var_1263_to_fp16 = const()[name = tensor<string, []>("op_1263_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1264_cast_fp16 = add(x = var_1262_cast_fp16, y = var_1263_to_fp16)[name = tensor<string, []>("op_1264_cast_fp16")];
+            tensor<fp16, []> denom_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_1264_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [512]> obj_79_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97711488)))];
+            tensor<fp16, [512]> obj_79_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_79_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97712576)))];
+            tensor<fp16, []> obj_79_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_79_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_79_cast_fp16")];
+            tensor<int32, [2]> var_1279 = const()[name = tensor<string, []>("op_1279"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1281 = const()[name = tensor<string, []>("op_1281"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97713664)))];
+            tensor<fp16, [512]> layers_5_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98238016)))];
+            tensor<fp16, [1, 512, 1, 1]> query_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_bias_to_fp16, dilations = var_1281, groups = var_1157, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_1279, weight = layers_5_encoder_attn_q_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_1285 = const()[name = tensor<string, []>("op_1285"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1287 = const()[name = tensor<string, []>("op_1287"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98239104)))];
+            tensor<fp16, [1, 512, 1, 1500]> key_cast_fp16 = conv(dilations = var_1287, groups = var_1157, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_1285, weight = layers_5_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_1292 = const()[name = tensor<string, []>("op_1292"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1294 = const()[name = tensor<string, []>("op_1294"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98763456)))];
+            tensor<fp16, [512]> layers_5_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99287808)))];
+            tensor<fp16, [1, 512, 1, 1500]> value_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_bias_to_fp16, dilations = var_1294, groups = var_1157, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_1292, weight = layers_5_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_1298 = const()[name = tensor<string, []>("op_1298"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1]> var_1299_cast_fp16 = reshape(shape = var_1298, x = query_cast_fp16)[name = tensor<string, []>("op_1299_cast_fp16")];
+            tensor<fp16, []> var_1300_to_fp16 = const()[name = tensor<string, []>("op_1300_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 8, 64, 1]> var_1301_cast_fp16 = mul(x = var_1299_cast_fp16, y = var_1300_to_fp16)[name = tensor<string, []>("op_1301_cast_fp16")];
+            tensor<int32, [4]> var_1302 = const()[name = tensor<string, []>("op_1302"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_1303_cast_fp16 = reshape(shape = var_1302, x = key_cast_fp16)[name = tensor<string, []>("op_1303_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_1301_cast_fp16, y = var_1303_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 8, 1, 1500]> obj_83_cast_fp16 = softmax(axis = var_1150, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_83_cast_fp16")];
+            tensor<int32, [4]> var_1307 = const()[name = tensor<string, []>("op_1307"), val = tensor<int32, [4]>([1, 8, 64, -1])];
+            tensor<fp16, [1, 8, 64, 1500]> var_1308_cast_fp16 = reshape(shape = var_1307, x = value_cast_fp16)[name = tensor<string, []>("op_1308_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 8, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_1308_cast_fp16, y = obj_83_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_1311 = const()[name = tensor<string, []>("op_1311"), val = tensor<int32, [4]>([1, 512, 1, -1])];
+            tensor<fp16, [1, 512, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1311, x = attn_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<int32, [2]> var_1315 = const()[name = tensor<string, []>("op_1315"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1317 = const()[name = tensor<string, []>("op_1317"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_81_pad_type_0 = const()[name = tensor<string, []>("obj_81_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_81_pad_0 = const()[name = tensor<string, []>("obj_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 512, 1, 1]> layers_5_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99288896)))];
+            tensor<fp16, [512]> layers_5_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99813248)))];
+            tensor<fp16, [1, 512, 1, 1]> obj_81_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_bias_to_fp16, dilations = var_1317, groups = var_1157, pad = obj_81_pad_0, pad_type = obj_81_pad_type_0, strides = var_1315, weight = layers_5_encoder_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("obj_81_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> var_1326 = const()[name = tensor<string, []>("op_1326"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_35_cast_fp16 = reduce_mean(axes = var_1326, keep_dims = var_1158, x = inputs_35_cast_fp16)[name = tensor<string, []>("channels_mean_35_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_35_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_sq_35_cast_fp16")];
+            tensor<int32, [1]> var_1330 = const()[name = tensor<string, []>("op_1330"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1331_cast_fp16 = reduce_mean(axes = var_1330, keep_dims = var_1158, x = zero_mean_sq_35_cast_fp16)[name = tensor<string, []>("op_1331_cast_fp16")];
+            tensor<fp16, []> var_1332_to_fp16 = const()[name = tensor<string, []>("op_1332_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1333_cast_fp16 = add(x = var_1331_cast_fp16, y = var_1332_to_fp16)[name = tensor<string, []>("op_1333_cast_fp16")];
+            tensor<fp16, []> denom_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_1333_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [512]> input_55_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_55_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99814336)))];
+            tensor<fp16, [512]> input_55_beta_0_to_fp16 = const()[name = tensor<string, []>("input_55_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99815424)))];
+            tensor<fp16, []> input_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<int32, [2]> var_1344 = const()[name = tensor<string, []>("op_1344"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1346 = const()[name = tensor<string, []>("op_1346"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_57_pad_type_0 = const()[name = tensor<string, []>("input_57_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_57_pad_0 = const()[name = tensor<string, []>("input_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [2048, 512, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99816512)))];
+            tensor<fp16, [2048]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101913728)))];
+            tensor<fp16, [1, 2048, 1, 1]> input_57_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_1346, groups = var_1157, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = var_1344, weight = layers_5_fc1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_57_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_1352 = const()[name = tensor<string, []>("op_1352"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1354 = const()[name = tensor<string, []>("op_1354"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [512, 2048, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101917888)))];
+            tensor<fp16, [512]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104015104)))];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_13_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_1354, groups = var_1157, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_1352, weight = layers_5_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> inputs_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_1365 = const()[name = tensor<string, []>("op_1365"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1369 = const()[name = tensor<string, []>("op_1369"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_1369, keep_dims = var_1365, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_1373 = const()[name = tensor<string, []>("op_1373"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1374_cast_fp16 = reduce_mean(axes = var_1373, keep_dims = var_1365, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_1374_cast_fp16")];
+            tensor<fp16, []> var_1375_to_fp16 = const()[name = tensor<string, []>("op_1375_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1376_cast_fp16 = add(x = var_1374_cast_fp16, y = var_1375_to_fp16)[name = tensor<string, []>("op_1376_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_1376_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [512]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104016192)))];
+            tensor<fp16, [512]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104017280)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_1386_axes_0 = const()[name = tensor<string, []>("op_1386_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1]> var_1386_cast_fp16 = squeeze(axes = var_1386_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_1386_cast_fp16")];
+            tensor<int32, [3]> var_1389_perm_0 = const()[name = tensor<string, []>("op_1389_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104018368)))];
+            tensor<fp16, [1, 1, 512]> transpose_0 = transpose(perm = var_1389_perm_0, x = var_1386_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_1393 = const()[name = tensor<string, []>("op_1393"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_87_interleave_0 = const()[name = tensor<string, []>("obj_87_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 3072, 1, 1]> key_cache_updates = concat(axis = var_1393, interleave = obj_87_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_87_cast_fp16")];
+            tensor<int32, []> var_1396 = const()[name = tensor<string, []>("op_1396"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_89_interleave_0 = const()[name = tensor<string, []>("obj_89_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 3072, 1, 1]> value_cache_updates = concat(axis = var_1396, interleave = obj_89_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_89_cast_fp16")];
+            tensor<int32, [4]> var_1407_begin_0 = const()[name = tensor<string, []>("op_1407_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_1407_end_0 = const()[name = tensor<string, []>("op_1407_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_1407_end_mask_0 = const()[name = tensor<string, []>("op_1407_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1407_cast_fp16 = slice_by_index(begin = var_1407_begin_0, end = var_1407_end_0, end_mask = var_1407_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<int32, [4]> var_1410_begin_0 = const()[name = tensor<string, []>("op_1410_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1410_end_0 = const()[name = tensor<string, []>("op_1410_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1410_end_mask_0 = const()[name = tensor<string, []>("op_1410_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1410_squeeze_mask_0 = const()[name = tensor<string, []>("op_1410_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, squeeze_mask = var_1410_squeeze_mask_0, x = var_1407_cast_fp16)[name = tensor<string, []>("op_1410_cast_fp16")];
+            tensor<int32, [4]> var_1425_begin_0 = const()[name = tensor<string, []>("op_1425_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_1425_end_0 = const()[name = tensor<string, []>("op_1425_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_1425_end_mask_0 = const()[name = tensor<string, []>("op_1425_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1425_cast_fp16 = slice_by_index(begin = var_1425_begin_0, end = var_1425_end_0, end_mask = var_1425_end_mask_0, x = obj_69_cast_fp16)[name = tensor<string, []>("op_1425_cast_fp16")];
+            tensor<int32, [4]> var_1428_begin_0 = const()[name = tensor<string, []>("op_1428_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1428_end_0 = const()[name = tensor<string, []>("op_1428_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1428_end_mask_0 = const()[name = tensor<string, []>("op_1428_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1428_squeeze_mask_0 = const()[name = tensor<string, []>("op_1428_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1428_cast_fp16 = slice_by_index(begin = var_1428_begin_0, end = var_1428_end_0, end_mask = var_1428_end_mask_0, squeeze_mask = var_1428_squeeze_mask_0, x = var_1425_cast_fp16)[name = tensor<string, []>("op_1428_cast_fp16")];
+            tensor<int32, [4]> var_1443_begin_0 = const()[name = tensor<string, []>("op_1443_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_1443_end_0 = const()[name = tensor<string, []>("op_1443_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_1443_end_mask_0 = const()[name = tensor<string, []>("op_1443_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1443_cast_fp16 = slice_by_index(begin = var_1443_begin_0, end = var_1443_end_0, end_mask = var_1443_end_mask_0, x = obj_69_cast_fp16)[name = tensor<string, []>("op_1443_cast_fp16")];
+            tensor<int32, [4]> var_1446_begin_0 = const()[name = tensor<string, []>("op_1446_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1446_end_0 = const()[name = tensor<string, []>("op_1446_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1446_end_mask_0 = const()[name = tensor<string, []>("op_1446_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1446_squeeze_mask_0 = const()[name = tensor<string, []>("op_1446_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, squeeze_mask = var_1446_squeeze_mask_0, x = var_1443_cast_fp16)[name = tensor<string, []>("op_1446_cast_fp16")];
+            tensor<int32, [4]> var_1461_begin_0 = const()[name = tensor<string, []>("op_1461_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_1461_end_0 = const()[name = tensor<string, []>("op_1461_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_1461_end_mask_0 = const()[name = tensor<string, []>("op_1461_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1461_cast_fp16 = slice_by_index(begin = var_1461_begin_0, end = var_1461_end_0, end_mask = var_1461_end_mask_0, x = obj_69_cast_fp16)[name = tensor<string, []>("op_1461_cast_fp16")];
+            tensor<int32, [4]> var_1464_begin_0 = const()[name = tensor<string, []>("op_1464_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1464_end_0 = const()[name = tensor<string, []>("op_1464_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1464_end_mask_0 = const()[name = tensor<string, []>("op_1464_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1464_squeeze_mask_0 = const()[name = tensor<string, []>("op_1464_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1464_cast_fp16 = slice_by_index(begin = var_1464_begin_0, end = var_1464_end_0, end_mask = var_1464_end_mask_0, squeeze_mask = var_1464_squeeze_mask_0, x = var_1461_cast_fp16)[name = tensor<string, []>("op_1464_cast_fp16")];
+            tensor<int32, [4]> var_1479_begin_0 = const()[name = tensor<string, []>("op_1479_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_1479_end_0 = const()[name = tensor<string, []>("op_1479_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_1479_end_mask_0 = const()[name = tensor<string, []>("op_1479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1479_cast_fp16 = slice_by_index(begin = var_1479_begin_0, end = var_1479_end_0, end_mask = var_1479_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_1479_cast_fp16")];
+            tensor<int32, [4]> var_1482_begin_0 = const()[name = tensor<string, []>("op_1482_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1482_end_0 = const()[name = tensor<string, []>("op_1482_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1482_end_mask_0 = const()[name = tensor<string, []>("op_1482_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1482_squeeze_mask_0 = const()[name = tensor<string, []>("op_1482_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, squeeze_mask = var_1482_squeeze_mask_0, x = var_1479_cast_fp16)[name = tensor<string, []>("op_1482_cast_fp16")];
+            tensor<int32, [4]> var_1497_begin_0 = const()[name = tensor<string, []>("op_1497_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_1497_end_0 = const()[name = tensor<string, []>("op_1497_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_1497_end_mask_0 = const()[name = tensor<string, []>("op_1497_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1497_cast_fp16 = slice_by_index(begin = var_1497_begin_0, end = var_1497_end_0, end_mask = var_1497_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_1497_cast_fp16")];
+            tensor<int32, [4]> var_1500_begin_0 = const()[name = tensor<string, []>("op_1500_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1500_end_0 = const()[name = tensor<string, []>("op_1500_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1500_end_mask_0 = const()[name = tensor<string, []>("op_1500_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1500_squeeze_mask_0 = const()[name = tensor<string, []>("op_1500_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1500_cast_fp16 = slice_by_index(begin = var_1500_begin_0, end = var_1500_end_0, end_mask = var_1500_end_mask_0, squeeze_mask = var_1500_squeeze_mask_0, x = var_1497_cast_fp16)[name = tensor<string, []>("op_1500_cast_fp16")];
+            tensor<int32, [4]> var_1515_begin_0 = const()[name = tensor<string, []>("op_1515_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_1515_end_0 = const()[name = tensor<string, []>("op_1515_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_1515_end_mask_0 = const()[name = tensor<string, []>("op_1515_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1515_cast_fp16 = slice_by_index(begin = var_1515_begin_0, end = var_1515_end_0, end_mask = var_1515_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_1515_cast_fp16")];
+            tensor<int32, [4]> var_1518_begin_0 = const()[name = tensor<string, []>("op_1518_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1518_end_0 = const()[name = tensor<string, []>("op_1518_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1518_end_mask_0 = const()[name = tensor<string, []>("op_1518_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1518_squeeze_mask_0 = const()[name = tensor<string, []>("op_1518_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1518_cast_fp16 = slice_by_index(begin = var_1518_begin_0, end = var_1518_end_0, end_mask = var_1518_end_mask_0, squeeze_mask = var_1518_squeeze_mask_0, x = var_1515_cast_fp16)[name = tensor<string, []>("op_1518_cast_fp16")];
+            tensor<int32, [4]> var_1533_begin_0 = const()[name = tensor<string, []>("op_1533_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_1533_end_0 = const()[name = tensor<string, []>("op_1533_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1500])];
+            tensor<bool, [4]> var_1533_end_mask_0 = const()[name = tensor<string, []>("op_1533_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = var_1533_end_0, end_mask = var_1533_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_1533_cast_fp16")];
+            tensor<int32, [4]> var_1536_begin_0 = const()[name = tensor<string, []>("op_1536_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1536_end_0 = const()[name = tensor<string, []>("op_1536_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1536_end_mask_0 = const()[name = tensor<string, []>("op_1536_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1536_squeeze_mask_0 = const()[name = tensor<string, []>("op_1536_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1536_cast_fp16 = slice_by_index(begin = var_1536_begin_0, end = var_1536_end_0, end_mask = var_1536_end_mask_0, squeeze_mask = var_1536_squeeze_mask_0, x = var_1533_cast_fp16)[name = tensor<string, []>("op_1536_cast_fp16")];
+            tensor<int32, []> var_1543 = const()[name = tensor<string, []>("op_1543"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1544_interleave_0 = const()[name = tensor<string, []>("op_1544_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1500]> var_1544_cast_fp16 = concat(axis = var_1543, interleave = var_1544_interleave_0, values = (var_1410_cast_fp16, var_1428_cast_fp16, var_1446_cast_fp16, var_1464_cast_fp16, var_1482_cast_fp16, var_1500_cast_fp16, var_1518_cast_fp16, var_1536_cast_fp16))[name = tensor<string, []>("op_1544_cast_fp16")];
+            tensor<int32, [1]> var_1546 = const()[name = tensor<string, []>("op_1546"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_1547 = const()[name = tensor<string, []>("op_1547"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_1546, keep_dims = var_1547, x = var_1544_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-base/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..dd03f976fadf935ea2810a9b15d602faf1e8624c
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae260ff7b95d0c957c3c1f4df4dbeaa0ae6c76bacc55eb86caca8f6820d346f0
+size 164481
diff --git a/openai_whisper-base/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-base/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d7a5837254783b1a1342f54b28bcdfcfd6706da0
--- /dev/null
+++ b/openai_whisper-base/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72325d42a4a4ccc8a6fa974ede6cdf2e0770685a5c4f9da94f41495b94d8d174
+size 104122162
diff --git a/openai_whisper-base/config.json b/openai_whisper-base/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..61e1ae6f9129c8c6fda294e069baf047f7366e1b
--- /dev/null
+++ b/openai_whisper-base/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-base", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 512, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 6, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865}
\ No newline at end of file
diff --git a/openai_whisper-base/generation_config.json b/openai_whisper-base/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce587327c4acabdbbb31865d28a3d79696608181
--- /dev/null
+++ b/openai_whisper-base/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]], "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "decoder_start_token_id": 50258, "eos_token_id": 50257, "forced_decoder_ids": [[1, null], [2, 50359]], "is_multilingual": true, "lang_to_id": {"<|af|>": 50327, "<|am|>": 50334, "<|ar|>": 50272, "<|as|>": 50350, "<|az|>": 50304, "<|ba|>": 50355, "<|be|>": 50330, "<|bg|>": 50292, "<|bn|>": 50302, "<|bo|>": 50347, "<|br|>": 50309, "<|bs|>": 50315, "<|ca|>": 50270, "<|cs|>": 50283, "<|cy|>": 50297, "<|da|>": 50285, "<|de|>": 50261, "<|el|>": 50281, "<|en|>": 50259, "<|es|>": 50262, "<|et|>": 50307, "<|eu|>": 50310, "<|fa|>": 50300, "<|fi|>": 50277, "<|fo|>": 50338, "<|fr|>": 50265, "<|gl|>": 50319, "<|gu|>": 50333, "<|haw|>": 50352, "<|ha|>": 50354, "<|he|>": 50279, "<|hi|>": 50276, "<|hr|>": 50291, "<|ht|>": 50339, "<|hu|>": 50286, "<|hy|>": 50312, "<|id|>": 50275, "<|is|>": 50311, "<|it|>": 50274, "<|ja|>": 50266, "<|jw|>": 50356, "<|ka|>": 50329, "<|kk|>": 50316, "<|km|>": 50323, "<|kn|>": 50306, "<|ko|>": 50264, "<|la|>": 50294, "<|lb|>": 50345, "<|ln|>": 50353, "<|lo|>": 50336, "<|lt|>": 50293, "<|lv|>": 50301, "<|mg|>": 50349, "<|mi|>": 50295, "<|mk|>": 50308, "<|ml|>": 50296, "<|mn|>": 50314, "<|mr|>": 50320, "<|ms|>": 50282, "<|mt|>": 50343, "<|my|>": 50346, "<|ne|>": 50313, "<|nl|>": 50271, "<|nn|>": 50342, "<|no|>": 50288, "<|oc|>": 50328, "<|pa|>": 50321, "<|pl|>": 50269, "<|ps|>": 50340, "<|pt|>": 50267, "<|ro|>": 50284, "<|ru|>": 50263, "<|sa|>": 50344, "<|sd|>": 50332, "<|si|>": 50322, "<|sk|>": 50298, "<|sl|>": 50305, "<|sn|>": 50324, "<|so|>": 50326, "<|sq|>": 50317, "<|sr|>": 50303, "<|su|>": 50357, "<|sv|>": 50273, "<|sw|>": 50318, "<|ta|>": 50287, "<|te|>": 50299, "<|tg|>": 50331, "<|th|>": 50289, "<|tk|>": 50341, "<|tl|>": 50348, "<|tr|>": 50268, "<|tt|>": 50351, "<|uk|>": 50280, "<|ur|>": 50290, "<|uz|>": 50337, "<|vi|>": 50278, "<|yi|>": 50335, "<|yo|>": 50325, "<|zh|>": 50260}, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50363, "pad_token_id": 50257, "prev_sot_token_id": 50361, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "task_to_id": {"transcribe": 50359, "translate": 50358}, "transformers_version": "4.31.0.dev0"}
\ No newline at end of file
diff --git a/openai_whisper-medium/.DS_Store b/openai_whisper-medium/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3369cfe73ca7c42a93b5a1126bc06caba1cb3266
Binary files /dev/null and b/openai_whisper-medium/.DS_Store differ
diff --git a/openai_whisper-medium/AudioEncoder.mlcomputeplan.json b/openai_whisper-medium/AudioEncoder.mlcomputeplan.json
new file mode 100644
index 0000000000000000000000000000000000000000..6178f90738b5c90514c907c5d02ea5a60b8d4b87
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlcomputeplan.json
@@ -0,0 +1,4090 @@
+{
+    "7_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0768
+    },
+    "9_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0656
+    },
+    "17_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.4903
+    },
+    "19_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "21_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "25_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "31_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "39_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "46_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "54_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "56_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "58_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "60_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "63_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "64_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "66_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "69_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "71_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "79_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "80_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "83_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "87_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "95_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "97_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "105_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "106_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "110_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "114_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "122_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "129_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "137_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "139_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "141_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "143_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "146_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "147_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "149_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "152_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "154_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "162_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "163_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "166_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "170_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "178_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "180_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "188_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "189_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "193_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "197_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "205_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "212_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "220_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "222_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "224_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "226_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "229_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "230_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "232_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "235_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "237_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "245_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "246_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "249_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "253_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "261_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "263_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "271_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "272_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "276_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "280_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "288_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "295_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "303_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "305_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "307_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "309_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "312_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "313_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "315_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "318_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "320_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "328_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "329_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "332_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "336_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "344_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "346_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "354_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "355_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "359_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "363_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "371_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "378_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "386_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "388_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "390_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "392_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "395_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "396_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "398_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "401_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "403_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "411_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "412_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "415_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "419_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "427_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "429_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "437_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "438_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "442_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "446_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "454_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "461_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "469_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "471_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "473_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "475_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "478_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "479_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "481_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "484_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "486_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "494_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "495_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "498_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "502_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "510_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "512_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "520_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "521_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "525_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "529_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "537_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "544_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "552_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "554_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "556_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "558_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "561_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "562_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "564_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "567_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "569_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "577_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "578_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "581_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "585_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "593_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "595_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "603_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "604_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "608_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "612_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "620_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "627_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "635_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "637_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "639_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "641_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "644_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "645_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "647_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "650_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "652_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "660_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "661_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "664_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "668_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "676_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "678_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "686_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "687_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "691_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "695_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "703_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "710_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "718_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "720_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "722_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "724_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "727_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "728_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "730_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "733_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "735_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "743_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "744_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "747_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "751_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "759_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "761_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "769_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "770_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "774_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "778_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "786_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "793_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "801_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "803_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "805_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "807_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "810_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "811_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "813_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "816_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "818_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "826_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "827_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "830_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "834_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "842_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "844_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "852_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "853_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "857_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "861_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "869_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "876_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "884_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "886_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "888_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "890_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "893_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "894_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "896_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "899_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "901_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "909_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "910_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "913_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "917_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "925_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "927_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "935_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "936_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "940_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "944_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "952_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "959_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "967_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "969_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "971_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "973_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "976_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "977_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "979_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "982_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "984_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "992_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "993_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "996_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1000_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1008_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1010_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1018_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1019_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1023_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1027_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1035_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1042_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1050_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1052_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1054_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1056_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1059_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1060_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1062_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1065_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1067_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1075_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1076_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1079_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1083_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1091_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1093_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1101_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1102_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1106_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1110_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1118_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1125_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1133_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1135_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1137_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1139_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1142_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1143_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1145_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1148_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1150_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1158_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1159_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1162_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1166_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1174_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1176_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1184_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1185_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1189_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1193_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1201_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1208_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1216_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1218_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1220_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1222_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1225_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1226_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1228_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1231_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1233_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1241_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1242_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1245_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1249_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1257_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1259_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1267_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1268_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1272_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1276_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1284_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1291_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1299_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1301_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1303_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1305_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1308_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1309_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1311_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1314_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1316_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1324_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1325_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1328_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1332_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1340_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1342_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1350_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1351_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1355_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1359_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1367_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1374_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1382_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1384_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1386_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1388_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1391_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1392_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1394_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1397_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1399_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1407_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1408_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1411_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1415_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1423_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1425_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1433_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1434_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1438_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1442_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1450_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1457_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1465_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1467_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1469_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1471_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1474_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1475_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1477_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1480_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1482_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1490_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1491_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1494_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1498_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1506_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1508_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1516_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1517_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1521_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1525_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1533_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1540_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1548_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1550_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1552_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1554_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1557_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1558_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1560_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1563_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1565_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1573_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1574_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1577_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1581_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1589_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1591_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1599_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1600_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1604_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1608_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1616_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1623_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1631_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1633_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1635_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1637_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1640_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1641_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1643_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1646_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1648_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1656_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1657_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1660_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1664_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1672_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1674_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1682_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1683_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1687_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1691_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1699_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1706_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1714_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1716_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1718_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1720_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1723_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1724_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1726_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1729_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1731_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1739_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1740_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1743_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1747_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1755_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1757_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1765_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1766_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1770_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1774_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1782_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1789_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1797_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1799_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1801_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1803_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1806_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1807_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1809_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1812_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1814_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1822_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1823_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1826_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1830_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1838_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1840_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1848_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1849_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1853_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1857_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1865_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1872_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1880_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1882_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1884_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1886_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1889_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1890_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1892_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1895_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1897_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1905_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1906_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1909_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1913_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1921_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "1923_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "1931_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "1932_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1936_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1940_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1948_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1955_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1634
+    },
+    "1963_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1965_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1967_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0299
+    },
+    "1969_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1972_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1973_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7684
+    },
+    "1975_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1978_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.417
+    },
+    "1980_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1988_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1635
+    },
+    "1989_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "1992_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "1996_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "2004_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.654
+    },
+    "2006_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1311
+    },
+    "2014_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.6537
+    },
+    "2015_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0729
+    },
+    "2018_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    },
+    "2022_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0328
+    }
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-medium/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..98f87d2e3522cc89fe2802884af57e554033c076
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ac17e8f029504f6ecb645f539e039e650428b135cbed2d4234a4e65fde271d5
+size 243
diff --git a/openai_whisper-medium/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-medium/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..75ac6de0d2df1510d1ba541559ae6dd9f4555962
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d875fd7e0a389117711940fc2052b29dc4ed0fd07813265a13cb041ae068f805
+size 347
diff --git a/openai_whisper-medium/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-medium/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..5edeec8cb79522ccc33a47bad850a20778247078
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,68 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios16.softmax" : 24,
+      "Ios16.add" : 49,
+      "Ios16.mul" : 24,
+      "Ios16.batchNorm" : 49,
+      "Ios16.gelu" : 26,
+      "Ios16.reshape" : 96,
+      "Ios16.matmul" : 48,
+      "Ios16.layerNorm" : 49,
+      "Ios16.conv" : 146
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.2",
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-medium/AudioEncoder.mlmodelc/model.mil b/openai_whisper-medium/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..f055f94d2a260093c68f9c29edda631455de97ca
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,2029 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3401.3.1"}, {"coremlc-version", "3401.4.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<string, []> var_90_pad_type_0 = const()[name = tensor<string, []>("op_90_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_90_strides_0 = const()[name = tensor<string, []>("op_90_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_90_dilations_0 = const()[name = tensor<string, []>("op_90_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_90_groups_0 = const()[name = tensor<string, []>("op_90_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 80, 1, 3]> var_65_to_fp16 = const()[name = tensor<string, []>("op_65_to_fp16"), val = tensor<fp16, [1024, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1024]> var_71_to_fp16 = const()[name = tensor<string, []>("op_71_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491648)))];
+            tensor<fp16, [1, 1024, 1, 3000]> var_90_cast_fp16 = conv(bias = var_71_to_fp16, dilations = var_90_dilations_0, groups = var_90_groups_0, pad = var_90_pad_0, pad_type = var_90_pad_type_0, strides = var_90_strides_0, weight = var_65_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_90_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_90_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<string, []> var_130_pad_type_0 = const()[name = tensor<string, []>("op_130_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_130_pad_0 = const()[name = tensor<string, []>("op_130_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_130_strides_0 = const()[name = tensor<string, []>("op_130_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_130_dilations_0 = const()[name = tensor<string, []>("op_130_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_130_groups_0 = const()[name = tensor<string, []>("op_130_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 3]> var_105_to_fp16 = const()[name = tensor<string, []>("op_105_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493760)))];
+            tensor<fp16, [1024]> var_111_to_fp16 = const()[name = tensor<string, []>("op_111_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6785280)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_130_cast_fp16 = conv(bias = var_111_to_fp16, dilations = var_130_dilations_0, groups = var_130_groups_0, pad = var_130_pad_0, pad_type = var_130_pad_type_0, strides = var_130_strides_0, weight = var_105_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_130_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_130_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> var_148_to_fp16 = const()[name = tensor<string, []>("op_148_to_fp16"), val = tensor<fp16, [1, 1024, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6787392)))];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_148_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_158 = const()[name = tensor<string, []>("op_158"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = tensor<string, []>("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_180_to_fp16 = const()[name = tensor<string, []>("op_180_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_180_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [1024]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9859456)))];
+            tensor<fp16, [1024]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9861568)))];
+            tensor<fp16, [1024]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9863680)))];
+            tensor<fp16, [1024]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9865792)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_1_strides_0 = const()[name = tensor<string, []>("query_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_1_dilations_0 = const()[name = tensor<string, []>("query_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_1_groups_0 = const()[name = tensor<string, []>("query_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9867904)))];
+            tensor<fp16, [1024]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11965120)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_1_strides_0 = const()[name = tensor<string, []>("key_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_1_dilations_0 = const()[name = tensor<string, []>("key_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_1_groups_0 = const()[name = tensor<string, []>("key_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11967232)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_1_cast_fp16 = conv(dilations = key_1_dilations_0, groups = key_1_groups_0, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_1_strides_0 = const()[name = tensor<string, []>("value_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_1_dilations_0 = const()[name = tensor<string, []>("value_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_1_groups_0 = const()[name = tensor<string, []>("value_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14064448)))];
+            tensor<fp16, [1024]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16161664)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = value_1_dilations_0, groups = value_1_groups_0, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_1_cast_fp16 = reshape(shape = var_216, x = query_1_cast_fp16)[name = tensor<string, []>("mh_q_1_cast_fp16")];
+            tensor<fp16, []> var_218_to_fp16 = const()[name = tensor<string, []>("op_218_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_219_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_218_to_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<int32, [4]> var_222 = const()[name = tensor<string, []>("op_222"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_223_cast_fp16 = reshape(shape = var_222, x = key_1_cast_fp16)[name = tensor<string, []>("op_223_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_219_cast_fp16, y = var_223_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_226_cast_fp16 = softmax(axis = var_158, x = mh_w_1_cast_fp16)[name = tensor<string, []>("op_226_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = tensor<string, []>("op_227"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_228_cast_fp16 = reshape(shape = var_227, x = value_1_cast_fp16)[name = tensor<string, []>("op_228_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_228_cast_fp16, y = var_226_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_1_cast_fp16 = reshape(shape = var_231, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_3_strides_0 = const()[name = tensor<string, []>("obj_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_3_dilations_0 = const()[name = tensor<string, []>("obj_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_3_groups_0 = const()[name = tensor<string, []>("obj_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16163776)))];
+            tensor<fp16, [1024]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18260992)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_3_dilations_0, groups = obj_3_groups_0, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = obj_3_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = tensor<string, []>("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_249_to_fp16 = const()[name = tensor<string, []>("op_249_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_249_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [1024]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18263104)))];
+            tensor<fp16, [1024]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18265216)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_5_strides_0 = const()[name = tensor<string, []>("input_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_5_dilations_0 = const()[name = tensor<string, []>("input_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_5_groups_0 = const()[name = tensor<string, []>("input_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18267328)))];
+            tensor<fp16, [4096]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26656000)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_5_strides_0 = const()[name = tensor<string, []>("hidden_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_5_dilations_0 = const()[name = tensor<string, []>("hidden_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_5_groups_0 = const()[name = tensor<string, []>("hidden_states_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26664256)))];
+            tensor<fp16, [1024]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35052928)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_278 = const()[name = tensor<string, []>("op_278"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = tensor<string, []>("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_300_to_fp16 = const()[name = tensor<string, []>("op_300_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_300_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [1024]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35055040)))];
+            tensor<fp16, [1024]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35057152)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_3_strides_0 = const()[name = tensor<string, []>("query_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_3_dilations_0 = const()[name = tensor<string, []>("query_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_3_groups_0 = const()[name = tensor<string, []>("query_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35059264)))];
+            tensor<fp16, [1024]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37156480)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_3_strides_0 = const()[name = tensor<string, []>("key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_3_dilations_0 = const()[name = tensor<string, []>("key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_3_groups_0 = const()[name = tensor<string, []>("key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37158592)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_3_strides_0 = const()[name = tensor<string, []>("value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_3_dilations_0 = const()[name = tensor<string, []>("value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_3_groups_0 = const()[name = tensor<string, []>("value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39255808)))];
+            tensor<fp16, [1024]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41353024)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_3_cast_fp16 = reshape(shape = var_336, x = query_3_cast_fp16)[name = tensor<string, []>("mh_q_3_cast_fp16")];
+            tensor<fp16, []> var_338_to_fp16 = const()[name = tensor<string, []>("op_338_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_339_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_338_to_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<int32, [4]> var_342 = const()[name = tensor<string, []>("op_342"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_343_cast_fp16 = reshape(shape = var_342, x = key_3_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
+            tensor<bool, []> mh_w_3_transpose_x_0 = const()[name = tensor<string, []>("mh_w_3_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_3_transpose_y_0 = const()[name = tensor<string, []>("mh_w_3_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_339_cast_fp16, y = var_343_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_346_cast_fp16 = softmax(axis = var_278, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<int32, [4]> var_347 = const()[name = tensor<string, []>("op_347"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_348_cast_fp16 = reshape(shape = var_347, x = value_3_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_348_cast_fp16, y = var_346_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_351 = const()[name = tensor<string, []>("op_351"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_9_cast_fp16 = reshape(shape = var_351, x = attn_3_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_7_strides_0 = const()[name = tensor<string, []>("obj_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_7_dilations_0 = const()[name = tensor<string, []>("obj_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_7_groups_0 = const()[name = tensor<string, []>("obj_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41355136)))];
+            tensor<fp16, [1024]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43452352)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = tensor<string, []>("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_369_to_fp16 = const()[name = tensor<string, []>("op_369_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_369_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [1024]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43454464)))];
+            tensor<fp16, [1024]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43456576)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_13_strides_0 = const()[name = tensor<string, []>("input_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_13_dilations_0 = const()[name = tensor<string, []>("input_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_13_groups_0 = const()[name = tensor<string, []>("input_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43458688)))];
+            tensor<fp16, [4096]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51847360)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = tensor<string, []>("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = tensor<string, []>("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_7_groups_0 = const()[name = tensor<string, []>("hidden_states_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51855616)))];
+            tensor<fp16, [1024]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60244288)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_398 = const()[name = tensor<string, []>("op_398"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = tensor<string, []>("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_420_to_fp16 = const()[name = tensor<string, []>("op_420_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_420_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [1024]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60246400)))];
+            tensor<fp16, [1024]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60248512)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_5_strides_0 = const()[name = tensor<string, []>("query_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_5_dilations_0 = const()[name = tensor<string, []>("query_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_5_groups_0 = const()[name = tensor<string, []>("query_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60250624)))];
+            tensor<fp16, [1024]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62347840)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_5_strides_0 = const()[name = tensor<string, []>("key_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_5_dilations_0 = const()[name = tensor<string, []>("key_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_5_groups_0 = const()[name = tensor<string, []>("key_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62349952)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_5_cast_fp16 = conv(dilations = key_5_dilations_0, groups = key_5_groups_0, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_5_strides_0 = const()[name = tensor<string, []>("value_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_5_dilations_0 = const()[name = tensor<string, []>("value_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_5_groups_0 = const()[name = tensor<string, []>("value_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64447168)))];
+            tensor<fp16, [1024]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66544384)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = value_5_dilations_0, groups = value_5_groups_0, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_456 = const()[name = tensor<string, []>("op_456"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_5_cast_fp16 = reshape(shape = var_456, x = query_5_cast_fp16)[name = tensor<string, []>("mh_q_5_cast_fp16")];
+            tensor<fp16, []> var_458_to_fp16 = const()[name = tensor<string, []>("op_458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_459_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_458_to_fp16)[name = tensor<string, []>("op_459_cast_fp16")];
+            tensor<int32, [4]> var_462 = const()[name = tensor<string, []>("op_462"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_463_cast_fp16 = reshape(shape = var_462, x = key_5_cast_fp16)[name = tensor<string, []>("op_463_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_459_cast_fp16, y = var_463_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_466_cast_fp16 = softmax(axis = var_398, x = mh_w_5_cast_fp16)[name = tensor<string, []>("op_466_cast_fp16")];
+            tensor<int32, [4]> var_467 = const()[name = tensor<string, []>("op_467"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_468_cast_fp16 = reshape(shape = var_467, x = value_5_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_468_cast_fp16, y = var_466_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_17_cast_fp16 = reshape(shape = var_471, x = attn_5_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_11_strides_0 = const()[name = tensor<string, []>("obj_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_11_dilations_0 = const()[name = tensor<string, []>("obj_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_11_groups_0 = const()[name = tensor<string, []>("obj_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66546496)))];
+            tensor<fp16, [1024]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68643712)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = tensor<string, []>("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_489_to_fp16 = const()[name = tensor<string, []>("op_489_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_489_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [1024]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68645824)))];
+            tensor<fp16, [1024]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68647936)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_21_strides_0 = const()[name = tensor<string, []>("input_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_21_dilations_0 = const()[name = tensor<string, []>("input_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_21_groups_0 = const()[name = tensor<string, []>("input_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68650048)))];
+            tensor<fp16, [4096]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77038720)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_9_strides_0 = const()[name = tensor<string, []>("hidden_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_9_dilations_0 = const()[name = tensor<string, []>("hidden_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_9_groups_0 = const()[name = tensor<string, []>("hidden_states_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77046976)))];
+            tensor<fp16, [1024]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85435648)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_518 = const()[name = tensor<string, []>("op_518"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = tensor<string, []>("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_540_to_fp16 = const()[name = tensor<string, []>("op_540_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_540_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [1024]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85437760)))];
+            tensor<fp16, [1024]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85439872)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_7_strides_0 = const()[name = tensor<string, []>("query_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_7_dilations_0 = const()[name = tensor<string, []>("query_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_7_groups_0 = const()[name = tensor<string, []>("query_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85441984)))];
+            tensor<fp16, [1024]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87539200)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_7_strides_0 = const()[name = tensor<string, []>("key_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_7_dilations_0 = const()[name = tensor<string, []>("key_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_7_groups_0 = const()[name = tensor<string, []>("key_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87541312)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_7_strides_0 = const()[name = tensor<string, []>("value_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_7_dilations_0 = const()[name = tensor<string, []>("value_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_7_groups_0 = const()[name = tensor<string, []>("value_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89638528)))];
+            tensor<fp16, [1024]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91735744)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_576 = const()[name = tensor<string, []>("op_576"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_7_cast_fp16 = reshape(shape = var_576, x = query_7_cast_fp16)[name = tensor<string, []>("mh_q_7_cast_fp16")];
+            tensor<fp16, []> var_578_to_fp16 = const()[name = tensor<string, []>("op_578_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_579_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_578_to_fp16)[name = tensor<string, []>("op_579_cast_fp16")];
+            tensor<int32, [4]> var_582 = const()[name = tensor<string, []>("op_582"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_583_cast_fp16 = reshape(shape = var_582, x = key_7_cast_fp16)[name = tensor<string, []>("op_583_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_579_cast_fp16, y = var_583_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_586_cast_fp16 = softmax(axis = var_518, x = mh_w_7_cast_fp16)[name = tensor<string, []>("op_586_cast_fp16")];
+            tensor<int32, [4]> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_588_cast_fp16 = reshape(shape = var_587, x = value_7_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_588_cast_fp16, y = var_586_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_25_cast_fp16 = reshape(shape = var_591, x = attn_7_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> obj_15_pad_type_0 = const()[name = tensor<string, []>("obj_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_15_strides_0 = const()[name = tensor<string, []>("obj_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_15_pad_0 = const()[name = tensor<string, []>("obj_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_15_dilations_0 = const()[name = tensor<string, []>("obj_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_15_groups_0 = const()[name = tensor<string, []>("obj_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91737856)))];
+            tensor<fp16, [1024]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93835072)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_15_dilations_0, groups = obj_15_groups_0, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = obj_15_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = tensor<string, []>("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_609_to_fp16 = const()[name = tensor<string, []>("op_609_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_609_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [1024]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93837184)))];
+            tensor<fp16, [1024]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93839296)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93841408)))];
+            tensor<fp16, [4096]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102230080)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_11_strides_0 = const()[name = tensor<string, []>("hidden_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_11_dilations_0 = const()[name = tensor<string, []>("hidden_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_11_groups_0 = const()[name = tensor<string, []>("hidden_states_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102238336)))];
+            tensor<fp16, [1024]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110627008)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_638 = const()[name = tensor<string, []>("op_638"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = tensor<string, []>("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_660_to_fp16 = const()[name = tensor<string, []>("op_660_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_660_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [1024]> obj_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110629120)))];
+            tensor<fp16, [1024]> obj_17_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_17_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110631232)))];
+            tensor<fp16, []> obj_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("obj_17_cast_fp16")];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_9_strides_0 = const()[name = tensor<string, []>("query_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_9_dilations_0 = const()[name = tensor<string, []>("query_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_9_groups_0 = const()[name = tensor<string, []>("query_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110633344)))];
+            tensor<fp16, [1024]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112730560)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<string, []> key_9_pad_type_0 = const()[name = tensor<string, []>("key_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_9_strides_0 = const()[name = tensor<string, []>("key_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_9_pad_0 = const()[name = tensor<string, []>("key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_9_dilations_0 = const()[name = tensor<string, []>("key_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_9_groups_0 = const()[name = tensor<string, []>("key_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112732672)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_9_cast_fp16 = conv(dilations = key_9_dilations_0, groups = key_9_groups_0, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = key_9_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<string, []> value_9_pad_type_0 = const()[name = tensor<string, []>("value_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_9_strides_0 = const()[name = tensor<string, []>("value_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_9_pad_0 = const()[name = tensor<string, []>("value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_9_dilations_0 = const()[name = tensor<string, []>("value_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_9_groups_0 = const()[name = tensor<string, []>("value_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114829888)))];
+            tensor<fp16, [1024]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116927104)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = value_9_dilations_0, groups = value_9_groups_0, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = value_9_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_696 = const()[name = tensor<string, []>("op_696"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_9_cast_fp16 = reshape(shape = var_696, x = query_9_cast_fp16)[name = tensor<string, []>("mh_q_9_cast_fp16")];
+            tensor<fp16, []> var_698_to_fp16 = const()[name = tensor<string, []>("op_698_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_699_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_698_to_fp16)[name = tensor<string, []>("op_699_cast_fp16")];
+            tensor<int32, [4]> var_702 = const()[name = tensor<string, []>("op_702"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_703_cast_fp16 = reshape(shape = var_702, x = key_9_cast_fp16)[name = tensor<string, []>("op_703_cast_fp16")];
+            tensor<bool, []> mh_w_9_transpose_x_0 = const()[name = tensor<string, []>("mh_w_9_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_9_transpose_y_0 = const()[name = tensor<string, []>("mh_w_9_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_699_cast_fp16, y = var_703_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_706_cast_fp16 = softmax(axis = var_638, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_706_cast_fp16")];
+            tensor<int32, [4]> var_707 = const()[name = tensor<string, []>("op_707"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_708_cast_fp16 = reshape(shape = var_707, x = value_9_cast_fp16)[name = tensor<string, []>("op_708_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_708_cast_fp16, y = var_706_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_711 = const()[name = tensor<string, []>("op_711"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_33_cast_fp16 = reshape(shape = var_711, x = attn_9_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_19_strides_0 = const()[name = tensor<string, []>("obj_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_19_dilations_0 = const()[name = tensor<string, []>("obj_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_19_groups_0 = const()[name = tensor<string, []>("obj_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116929216)))];
+            tensor<fp16, [1024]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119026432)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = tensor<string, []>("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_729_to_fp16 = const()[name = tensor<string, []>("op_729_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_729_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [1024]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119028544)))];
+            tensor<fp16, [1024]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119030656)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_37_strides_0 = const()[name = tensor<string, []>("input_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_37_dilations_0 = const()[name = tensor<string, []>("input_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_37_groups_0 = const()[name = tensor<string, []>("input_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119032768)))];
+            tensor<fp16, [4096]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127421440)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_13_strides_0 = const()[name = tensor<string, []>("hidden_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_13_dilations_0 = const()[name = tensor<string, []>("hidden_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_13_groups_0 = const()[name = tensor<string, []>("hidden_states_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127429696)))];
+            tensor<fp16, [1024]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135818368)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = hidden_states_13_dilations_0, groups = hidden_states_13_groups_0, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = hidden_states_13_strides_0, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_758 = const()[name = tensor<string, []>("op_758"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = tensor<string, []>("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_780_to_fp16 = const()[name = tensor<string, []>("op_780_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_780_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [1024]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135820480)))];
+            tensor<fp16, [1024]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135822592)))];
+            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_11_strides_0 = const()[name = tensor<string, []>("query_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_11_dilations_0 = const()[name = tensor<string, []>("query_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_11_groups_0 = const()[name = tensor<string, []>("query_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135824704)))];
+            tensor<fp16, [1024]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137921920)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_11_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = query_11_dilations_0, groups = query_11_groups_0, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = query_11_strides_0, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_11_strides_0 = const()[name = tensor<string, []>("key_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_11_dilations_0 = const()[name = tensor<string, []>("key_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_11_groups_0 = const()[name = tensor<string, []>("key_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137924032)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_11_cast_fp16 = conv(dilations = key_11_dilations_0, groups = key_11_groups_0, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = key_11_strides_0, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_11_strides_0 = const()[name = tensor<string, []>("value_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_11_dilations_0 = const()[name = tensor<string, []>("value_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_11_groups_0 = const()[name = tensor<string, []>("value_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140021248)))];
+            tensor<fp16, [1024]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142118464)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = value_11_dilations_0, groups = value_11_groups_0, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = value_11_strides_0, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_816 = const()[name = tensor<string, []>("op_816"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_11_cast_fp16 = reshape(shape = var_816, x = query_11_cast_fp16)[name = tensor<string, []>("mh_q_11_cast_fp16")];
+            tensor<fp16, []> var_818_to_fp16 = const()[name = tensor<string, []>("op_818_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_819_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_818_to_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<int32, [4]> var_822 = const()[name = tensor<string, []>("op_822"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_823_cast_fp16 = reshape(shape = var_822, x = key_11_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_819_cast_fp16, y = var_823_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_826_cast_fp16 = softmax(axis = var_758, x = mh_w_11_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = tensor<string, []>("op_827"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_828_cast_fp16 = reshape(shape = var_827, x = value_11_cast_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_828_cast_fp16, y = var_826_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_831 = const()[name = tensor<string, []>("op_831"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_41_cast_fp16 = reshape(shape = var_831, x = attn_11_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_23_strides_0 = const()[name = tensor<string, []>("obj_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_23_dilations_0 = const()[name = tensor<string, []>("obj_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_23_groups_0 = const()[name = tensor<string, []>("obj_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142120576)))];
+            tensor<fp16, [1024]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144217792)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_23_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = obj_23_dilations_0, groups = obj_23_groups_0, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = obj_23_strides_0, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = tensor<string, []>("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_849_to_fp16 = const()[name = tensor<string, []>("op_849_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_849_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [1024]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144219904)))];
+            tensor<fp16, [1024]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144222016)))];
+            tensor<fp16, []> input_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> input_45_pad_type_0 = const()[name = tensor<string, []>("input_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_45_strides_0 = const()[name = tensor<string, []>("input_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_45_pad_0 = const()[name = tensor<string, []>("input_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_45_dilations_0 = const()[name = tensor<string, []>("input_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_45_groups_0 = const()[name = tensor<string, []>("input_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144224128)))];
+            tensor<fp16, [4096]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152612800)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = tensor<string, []>("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = tensor<string, []>("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_15_groups_0 = const()[name = tensor<string, []>("hidden_states_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152621056)))];
+            tensor<fp16, [1024]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161009728)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_15_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = layers_5_fc2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_878 = const()[name = tensor<string, []>("op_878"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = tensor<string, []>("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_900_to_fp16 = const()[name = tensor<string, []>("op_900_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_900_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [1024]> obj_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161011840)))];
+            tensor<fp16, [1024]> obj_25_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_25_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161013952)))];
+            tensor<fp16, []> obj_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_13_strides_0 = const()[name = tensor<string, []>("query_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_13_dilations_0 = const()[name = tensor<string, []>("query_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_13_groups_0 = const()[name = tensor<string, []>("query_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161016064)))];
+            tensor<fp16, [1024]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163113280)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_13_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<string, []> key_13_pad_type_0 = const()[name = tensor<string, []>("key_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_13_strides_0 = const()[name = tensor<string, []>("key_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_13_pad_0 = const()[name = tensor<string, []>("key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_13_dilations_0 = const()[name = tensor<string, []>("key_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_13_groups_0 = const()[name = tensor<string, []>("key_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163115392)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_13_cast_fp16 = conv(dilations = key_13_dilations_0, groups = key_13_groups_0, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = key_13_strides_0, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<string, []> value_13_pad_type_0 = const()[name = tensor<string, []>("value_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_13_strides_0 = const()[name = tensor<string, []>("value_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_13_pad_0 = const()[name = tensor<string, []>("value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_13_dilations_0 = const()[name = tensor<string, []>("value_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_13_groups_0 = const()[name = tensor<string, []>("value_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165212608)))];
+            tensor<fp16, [1024]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167309824)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = value_13_dilations_0, groups = value_13_groups_0, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = value_13_strides_0, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_936 = const()[name = tensor<string, []>("op_936"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_13_cast_fp16 = reshape(shape = var_936, x = query_13_cast_fp16)[name = tensor<string, []>("mh_q_13_cast_fp16")];
+            tensor<fp16, []> var_938_to_fp16 = const()[name = tensor<string, []>("op_938_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_939_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_938_to_fp16)[name = tensor<string, []>("op_939_cast_fp16")];
+            tensor<int32, [4]> var_942 = const()[name = tensor<string, []>("op_942"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_943_cast_fp16 = reshape(shape = var_942, x = key_13_cast_fp16)[name = tensor<string, []>("op_943_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_939_cast_fp16, y = var_943_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_946_cast_fp16 = softmax(axis = var_878, x = mh_w_13_cast_fp16)[name = tensor<string, []>("op_946_cast_fp16")];
+            tensor<int32, [4]> var_947 = const()[name = tensor<string, []>("op_947"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_948_cast_fp16 = reshape(shape = var_947, x = value_13_cast_fp16)[name = tensor<string, []>("op_948_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_948_cast_fp16, y = var_946_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_951 = const()[name = tensor<string, []>("op_951"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_49_cast_fp16 = reshape(shape = var_951, x = attn_13_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> obj_27_pad_type_0 = const()[name = tensor<string, []>("obj_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_27_strides_0 = const()[name = tensor<string, []>("obj_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_27_pad_0 = const()[name = tensor<string, []>("obj_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_27_dilations_0 = const()[name = tensor<string, []>("obj_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_27_groups_0 = const()[name = tensor<string, []>("obj_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167311936)))];
+            tensor<fp16, [1024]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169409152)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_27_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = obj_27_dilations_0, groups = obj_27_groups_0, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = obj_27_strides_0, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = tensor<string, []>("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_969_to_fp16 = const()[name = tensor<string, []>("op_969_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_969_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [1024]> input_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_51_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169411264)))];
+            tensor<fp16, [1024]> input_51_beta_0_to_fp16 = const()[name = tensor<string, []>("input_51_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169413376)))];
+            tensor<fp16, []> input_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> input_53_pad_type_0 = const()[name = tensor<string, []>("input_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_53_strides_0 = const()[name = tensor<string, []>("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_53_pad_0 = const()[name = tensor<string, []>("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_53_dilations_0 = const()[name = tensor<string, []>("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_53_groups_0 = const()[name = tensor<string, []>("input_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169415488)))];
+            tensor<fp16, [4096]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177804160)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_53_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = layers_6_fc1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_17_strides_0 = const()[name = tensor<string, []>("hidden_states_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_17_dilations_0 = const()[name = tensor<string, []>("hidden_states_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_17_groups_0 = const()[name = tensor<string, []>("hidden_states_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177812416)))];
+            tensor<fp16, [1024]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186201088)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_17_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = layers_6_fc2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_998 = const()[name = tensor<string, []>("op_998"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = tensor<string, []>("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1020_to_fp16 = const()[name = tensor<string, []>("op_1020_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1020_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [1024]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186203200)))];
+            tensor<fp16, [1024]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186205312)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_15_strides_0 = const()[name = tensor<string, []>("query_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_15_dilations_0 = const()[name = tensor<string, []>("query_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_15_groups_0 = const()[name = tensor<string, []>("query_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186207424)))];
+            tensor<fp16, [1024]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188304640)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_15_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = query_15_dilations_0, groups = query_15_groups_0, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = query_15_strides_0, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_15_strides_0 = const()[name = tensor<string, []>("key_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_15_dilations_0 = const()[name = tensor<string, []>("key_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_15_groups_0 = const()[name = tensor<string, []>("key_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188306752)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_15_cast_fp16 = conv(dilations = key_15_dilations_0, groups = key_15_groups_0, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = key_15_strides_0, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_15_strides_0 = const()[name = tensor<string, []>("value_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_15_dilations_0 = const()[name = tensor<string, []>("value_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_15_groups_0 = const()[name = tensor<string, []>("value_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190403968)))];
+            tensor<fp16, [1024]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192501184)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = value_15_dilations_0, groups = value_15_groups_0, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = value_15_strides_0, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_1056 = const()[name = tensor<string, []>("op_1056"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_15_cast_fp16 = reshape(shape = var_1056, x = query_15_cast_fp16)[name = tensor<string, []>("mh_q_15_cast_fp16")];
+            tensor<fp16, []> var_1058_to_fp16 = const()[name = tensor<string, []>("op_1058_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1059_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1058_to_fp16)[name = tensor<string, []>("op_1059_cast_fp16")];
+            tensor<int32, [4]> var_1062 = const()[name = tensor<string, []>("op_1062"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1063_cast_fp16 = reshape(shape = var_1062, x = key_15_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<bool, []> mh_w_15_transpose_x_0 = const()[name = tensor<string, []>("mh_w_15_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_15_transpose_y_0 = const()[name = tensor<string, []>("mh_w_15_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1059_cast_fp16, y = var_1063_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1066_cast_fp16 = softmax(axis = var_998, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<int32, [4]> var_1067 = const()[name = tensor<string, []>("op_1067"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1068_cast_fp16 = reshape(shape = var_1067, x = value_15_cast_fp16)[name = tensor<string, []>("op_1068_cast_fp16")];
+            tensor<bool, []> attn_15_transpose_x_0 = const()[name = tensor<string, []>("attn_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_15_transpose_y_0 = const()[name = tensor<string, []>("attn_15_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1068_cast_fp16, y = var_1066_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1071 = const()[name = tensor<string, []>("op_1071"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_57_cast_fp16 = reshape(shape = var_1071, x = attn_15_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> obj_31_pad_type_0 = const()[name = tensor<string, []>("obj_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_31_strides_0 = const()[name = tensor<string, []>("obj_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_31_pad_0 = const()[name = tensor<string, []>("obj_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_31_dilations_0 = const()[name = tensor<string, []>("obj_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_31_groups_0 = const()[name = tensor<string, []>("obj_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192503296)))];
+            tensor<fp16, [1024]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194600512)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_31_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = obj_31_dilations_0, groups = obj_31_groups_0, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = obj_31_strides_0, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("obj_31_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = tensor<string, []>("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1089_to_fp16 = const()[name = tensor<string, []>("op_1089_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1089_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [1024]> input_59_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_59_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194602624)))];
+            tensor<fp16, [1024]> input_59_beta_0_to_fp16 = const()[name = tensor<string, []>("input_59_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194604736)))];
+            tensor<fp16, []> input_59_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_59_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_pad_type_0 = const()[name = tensor<string, []>("input_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_61_strides_0 = const()[name = tensor<string, []>("input_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_61_pad_0 = const()[name = tensor<string, []>("input_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_61_dilations_0 = const()[name = tensor<string, []>("input_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_61_groups_0 = const()[name = tensor<string, []>("input_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194606848)))];
+            tensor<fp16, [4096]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202995520)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_61_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = input_61_dilations_0, groups = input_61_groups_0, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = input_61_strides_0, weight = layers_7_fc1_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> input_63_mode_0 = const()[name = tensor<string, []>("input_63_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_19_strides_0 = const()[name = tensor<string, []>("hidden_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_19_dilations_0 = const()[name = tensor<string, []>("hidden_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_19_groups_0 = const()[name = tensor<string, []>("hidden_states_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203003776)))];
+            tensor<fp16, [1024]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211392448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_19_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_7_fc2_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_1118 = const()[name = tensor<string, []>("op_1118"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = tensor<string, []>("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1140_to_fp16 = const()[name = tensor<string, []>("op_1140_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1140_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [1024]> obj_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211394560)))];
+            tensor<fp16, [1024]> obj_33_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_33_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211396672)))];
+            tensor<fp16, []> obj_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_33_cast_fp16")];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_17_strides_0 = const()[name = tensor<string, []>("query_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_17_dilations_0 = const()[name = tensor<string, []>("query_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_17_groups_0 = const()[name = tensor<string, []>("query_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211398784)))];
+            tensor<fp16, [1024]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213496000)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_17_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = query_17_dilations_0, groups = query_17_groups_0, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = query_17_strides_0, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<string, []> key_17_pad_type_0 = const()[name = tensor<string, []>("key_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_17_strides_0 = const()[name = tensor<string, []>("key_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_17_pad_0 = const()[name = tensor<string, []>("key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_17_dilations_0 = const()[name = tensor<string, []>("key_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_17_groups_0 = const()[name = tensor<string, []>("key_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213498112)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_17_cast_fp16 = conv(dilations = key_17_dilations_0, groups = key_17_groups_0, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = key_17_strides_0, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<string, []> value_17_pad_type_0 = const()[name = tensor<string, []>("value_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_17_strides_0 = const()[name = tensor<string, []>("value_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_17_pad_0 = const()[name = tensor<string, []>("value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_17_dilations_0 = const()[name = tensor<string, []>("value_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_17_groups_0 = const()[name = tensor<string, []>("value_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215595328)))];
+            tensor<fp16, [1024]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217692544)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = value_17_dilations_0, groups = value_17_groups_0, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = value_17_strides_0, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1176 = const()[name = tensor<string, []>("op_1176"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_17_cast_fp16 = reshape(shape = var_1176, x = query_17_cast_fp16)[name = tensor<string, []>("mh_q_17_cast_fp16")];
+            tensor<fp16, []> var_1178_to_fp16 = const()[name = tensor<string, []>("op_1178_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1179_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1178_to_fp16)[name = tensor<string, []>("op_1179_cast_fp16")];
+            tensor<int32, [4]> var_1182 = const()[name = tensor<string, []>("op_1182"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1183_cast_fp16 = reshape(shape = var_1182, x = key_17_cast_fp16)[name = tensor<string, []>("op_1183_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1179_cast_fp16, y = var_1183_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1186_cast_fp16 = softmax(axis = var_1118, x = mh_w_17_cast_fp16)[name = tensor<string, []>("op_1186_cast_fp16")];
+            tensor<int32, [4]> var_1187 = const()[name = tensor<string, []>("op_1187"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1188_cast_fp16 = reshape(shape = var_1187, x = value_17_cast_fp16)[name = tensor<string, []>("op_1188_cast_fp16")];
+            tensor<bool, []> attn_17_transpose_x_0 = const()[name = tensor<string, []>("attn_17_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_17_transpose_y_0 = const()[name = tensor<string, []>("attn_17_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1188_cast_fp16, y = var_1186_cast_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1191 = const()[name = tensor<string, []>("op_1191"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_65_cast_fp16 = reshape(shape = var_1191, x = attn_17_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_35_strides_0 = const()[name = tensor<string, []>("obj_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_35_dilations_0 = const()[name = tensor<string, []>("obj_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_35_groups_0 = const()[name = tensor<string, []>("obj_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217694656)))];
+            tensor<fp16, [1024]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219791872)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_35_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = tensor<string, []>("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1209_to_fp16 = const()[name = tensor<string, []>("op_1209_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1209_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [1024]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219793984)))];
+            tensor<fp16, [1024]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219796096)))];
+            tensor<fp16, []> input_67_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_67_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219798208)))];
+            tensor<fp16, [4096]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228186880)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_69_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = layers_8_fc1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_21_strides_0 = const()[name = tensor<string, []>("hidden_states_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_21_dilations_0 = const()[name = tensor<string, []>("hidden_states_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_21_groups_0 = const()[name = tensor<string, []>("hidden_states_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228195136)))];
+            tensor<fp16, [1024]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236583808)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_21_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = layers_8_fc2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1238 = const()[name = tensor<string, []>("op_1238"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = tensor<string, []>("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1260_to_fp16 = const()[name = tensor<string, []>("op_1260_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1260_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [1024]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236585920)))];
+            tensor<fp16, [1024]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236588032)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_19_strides_0 = const()[name = tensor<string, []>("query_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_19_dilations_0 = const()[name = tensor<string, []>("query_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_19_groups_0 = const()[name = tensor<string, []>("query_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236590144)))];
+            tensor<fp16, [1024]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238687360)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_19_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_19_strides_0 = const()[name = tensor<string, []>("key_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_19_dilations_0 = const()[name = tensor<string, []>("key_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_19_groups_0 = const()[name = tensor<string, []>("key_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238689472)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_19_cast_fp16 = conv(dilations = key_19_dilations_0, groups = key_19_groups_0, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = key_19_strides_0, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_19_strides_0 = const()[name = tensor<string, []>("value_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_19_dilations_0 = const()[name = tensor<string, []>("value_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_19_groups_0 = const()[name = tensor<string, []>("value_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240786688)))];
+            tensor<fp16, [1024]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242883904)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = value_19_dilations_0, groups = value_19_groups_0, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = value_19_strides_0, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1296 = const()[name = tensor<string, []>("op_1296"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_19_cast_fp16 = reshape(shape = var_1296, x = query_19_cast_fp16)[name = tensor<string, []>("mh_q_19_cast_fp16")];
+            tensor<fp16, []> var_1298_to_fp16 = const()[name = tensor<string, []>("op_1298_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1299_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1298_to_fp16)[name = tensor<string, []>("op_1299_cast_fp16")];
+            tensor<int32, [4]> var_1302 = const()[name = tensor<string, []>("op_1302"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1303_cast_fp16 = reshape(shape = var_1302, x = key_19_cast_fp16)[name = tensor<string, []>("op_1303_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1299_cast_fp16, y = var_1303_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1306_cast_fp16 = softmax(axis = var_1238, x = mh_w_19_cast_fp16)[name = tensor<string, []>("op_1306_cast_fp16")];
+            tensor<int32, [4]> var_1307 = const()[name = tensor<string, []>("op_1307"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1308_cast_fp16 = reshape(shape = var_1307, x = value_19_cast_fp16)[name = tensor<string, []>("op_1308_cast_fp16")];
+            tensor<bool, []> attn_19_transpose_x_0 = const()[name = tensor<string, []>("attn_19_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_19_transpose_y_0 = const()[name = tensor<string, []>("attn_19_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1308_cast_fp16, y = var_1306_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1311 = const()[name = tensor<string, []>("op_1311"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_73_cast_fp16 = reshape(shape = var_1311, x = attn_19_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_39_strides_0 = const()[name = tensor<string, []>("obj_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_39_dilations_0 = const()[name = tensor<string, []>("obj_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_39_groups_0 = const()[name = tensor<string, []>("obj_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242886016)))];
+            tensor<fp16, [1024]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244983232)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_39_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = obj_39_dilations_0, groups = obj_39_groups_0, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = obj_39_strides_0, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = tensor<string, []>("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1329_to_fp16 = const()[name = tensor<string, []>("op_1329_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1329_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [1024]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244985344)))];
+            tensor<fp16, [1024]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244987456)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_77_strides_0 = const()[name = tensor<string, []>("input_77_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_77_dilations_0 = const()[name = tensor<string, []>("input_77_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_77_groups_0 = const()[name = tensor<string, []>("input_77_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244989568)))];
+            tensor<fp16, [4096]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253378240)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_77_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = input_77_dilations_0, groups = input_77_groups_0, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = input_77_strides_0, weight = layers_9_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = tensor<string, []>("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = tensor<string, []>("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_23_groups_0 = const()[name = tensor<string, []>("hidden_states_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253386496)))];
+            tensor<fp16, [1024]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261775168)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_23_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = layers_9_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_1358 = const()[name = tensor<string, []>("op_1358"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = tensor<string, []>("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1380_to_fp16 = const()[name = tensor<string, []>("op_1380_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1380_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [1024]> obj_41_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261777280)))];
+            tensor<fp16, [1024]> obj_41_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_41_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261779392)))];
+            tensor<fp16, []> obj_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_21_strides_0 = const()[name = tensor<string, []>("query_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_21_dilations_0 = const()[name = tensor<string, []>("query_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_21_groups_0 = const()[name = tensor<string, []>("query_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261781504)))];
+            tensor<fp16, [1024]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263878720)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_21_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = query_21_dilations_0, groups = query_21_groups_0, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = query_21_strides_0, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<string, []> key_21_pad_type_0 = const()[name = tensor<string, []>("key_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_21_strides_0 = const()[name = tensor<string, []>("key_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_21_pad_0 = const()[name = tensor<string, []>("key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_21_dilations_0 = const()[name = tensor<string, []>("key_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_21_groups_0 = const()[name = tensor<string, []>("key_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263880832)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_21_cast_fp16 = conv(dilations = key_21_dilations_0, groups = key_21_groups_0, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = key_21_strides_0, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<string, []> value_21_pad_type_0 = const()[name = tensor<string, []>("value_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_21_strides_0 = const()[name = tensor<string, []>("value_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_21_pad_0 = const()[name = tensor<string, []>("value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_21_dilations_0 = const()[name = tensor<string, []>("value_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_21_groups_0 = const()[name = tensor<string, []>("value_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265978048)))];
+            tensor<fp16, [1024]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268075264)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = value_21_dilations_0, groups = value_21_groups_0, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = value_21_strides_0, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_1416 = const()[name = tensor<string, []>("op_1416"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_21_cast_fp16 = reshape(shape = var_1416, x = query_21_cast_fp16)[name = tensor<string, []>("mh_q_21_cast_fp16")];
+            tensor<fp16, []> var_1418_to_fp16 = const()[name = tensor<string, []>("op_1418_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1419_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1418_to_fp16)[name = tensor<string, []>("op_1419_cast_fp16")];
+            tensor<int32, [4]> var_1422 = const()[name = tensor<string, []>("op_1422"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1423_cast_fp16 = reshape(shape = var_1422, x = key_21_cast_fp16)[name = tensor<string, []>("op_1423_cast_fp16")];
+            tensor<bool, []> mh_w_21_transpose_x_0 = const()[name = tensor<string, []>("mh_w_21_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_21_transpose_y_0 = const()[name = tensor<string, []>("mh_w_21_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_1419_cast_fp16, y = var_1423_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1426_cast_fp16 = softmax(axis = var_1358, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_1426_cast_fp16")];
+            tensor<int32, [4]> var_1427 = const()[name = tensor<string, []>("op_1427"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1428_cast_fp16 = reshape(shape = var_1427, x = value_21_cast_fp16)[name = tensor<string, []>("op_1428_cast_fp16")];
+            tensor<bool, []> attn_21_transpose_x_0 = const()[name = tensor<string, []>("attn_21_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_21_transpose_y_0 = const()[name = tensor<string, []>("attn_21_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1428_cast_fp16, y = var_1426_cast_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1431 = const()[name = tensor<string, []>("op_1431"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_81_cast_fp16 = reshape(shape = var_1431, x = attn_21_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> obj_43_pad_type_0 = const()[name = tensor<string, []>("obj_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_43_strides_0 = const()[name = tensor<string, []>("obj_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_43_pad_0 = const()[name = tensor<string, []>("obj_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_43_dilations_0 = const()[name = tensor<string, []>("obj_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_43_groups_0 = const()[name = tensor<string, []>("obj_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268077376)))];
+            tensor<fp16, [1024]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270174592)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_43_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = obj_43_dilations_0, groups = obj_43_groups_0, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = obj_43_strides_0, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = tensor<string, []>("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1449_to_fp16 = const()[name = tensor<string, []>("op_1449_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1449_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [1024]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270176704)))];
+            tensor<fp16, [1024]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270178816)))];
+            tensor<fp16, []> input_83_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_83_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> input_85_pad_type_0 = const()[name = tensor<string, []>("input_85_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_85_strides_0 = const()[name = tensor<string, []>("input_85_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_85_pad_0 = const()[name = tensor<string, []>("input_85_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_85_dilations_0 = const()[name = tensor<string, []>("input_85_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_85_groups_0 = const()[name = tensor<string, []>("input_85_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270180928)))];
+            tensor<fp16, [4096]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278569600)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_85_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = input_85_dilations_0, groups = input_85_groups_0, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = input_85_strides_0, weight = layers_10_fc1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> input_87_mode_0 = const()[name = tensor<string, []>("input_87_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_25_strides_0 = const()[name = tensor<string, []>("hidden_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_25_dilations_0 = const()[name = tensor<string, []>("hidden_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_25_groups_0 = const()[name = tensor<string, []>("hidden_states_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278577856)))];
+            tensor<fp16, [1024]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286966528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_25_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = hidden_states_25_dilations_0, groups = hidden_states_25_groups_0, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = hidden_states_25_strides_0, weight = layers_10_fc2_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_1478 = const()[name = tensor<string, []>("op_1478"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = tensor<string, []>("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1500_to_fp16 = const()[name = tensor<string, []>("op_1500_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_1500_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [1024]> obj_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286968640)))];
+            tensor<fp16, [1024]> obj_45_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_45_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286970752)))];
+            tensor<fp16, []> obj_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_45_cast_fp16")];
+            tensor<string, []> query_23_pad_type_0 = const()[name = tensor<string, []>("query_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_23_strides_0 = const()[name = tensor<string, []>("query_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_23_pad_0 = const()[name = tensor<string, []>("query_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_23_dilations_0 = const()[name = tensor<string, []>("query_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_23_groups_0 = const()[name = tensor<string, []>("query_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286972864)))];
+            tensor<fp16, [1024]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289070080)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_23_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = query_23_dilations_0, groups = query_23_groups_0, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = query_23_strides_0, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("query_23_cast_fp16")];
+            tensor<string, []> key_23_pad_type_0 = const()[name = tensor<string, []>("key_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_23_strides_0 = const()[name = tensor<string, []>("key_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_23_pad_0 = const()[name = tensor<string, []>("key_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_23_dilations_0 = const()[name = tensor<string, []>("key_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_23_groups_0 = const()[name = tensor<string, []>("key_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289072192)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_23_cast_fp16 = conv(dilations = key_23_dilations_0, groups = key_23_groups_0, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = key_23_strides_0, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("key_23_cast_fp16")];
+            tensor<string, []> value_23_pad_type_0 = const()[name = tensor<string, []>("value_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_23_strides_0 = const()[name = tensor<string, []>("value_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_23_pad_0 = const()[name = tensor<string, []>("value_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_23_dilations_0 = const()[name = tensor<string, []>("value_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_23_groups_0 = const()[name = tensor<string, []>("value_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291169408)))];
+            tensor<fp16, [1024]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293266624)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_23_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = value_23_dilations_0, groups = value_23_groups_0, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = value_23_strides_0, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("value_23_cast_fp16")];
+            tensor<int32, [4]> var_1536 = const()[name = tensor<string, []>("op_1536"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_23_cast_fp16 = reshape(shape = var_1536, x = query_23_cast_fp16)[name = tensor<string, []>("mh_q_23_cast_fp16")];
+            tensor<fp16, []> var_1538_to_fp16 = const()[name = tensor<string, []>("op_1538_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1539_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1538_to_fp16)[name = tensor<string, []>("op_1539_cast_fp16")];
+            tensor<int32, [4]> var_1542 = const()[name = tensor<string, []>("op_1542"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1543_cast_fp16 = reshape(shape = var_1542, x = key_23_cast_fp16)[name = tensor<string, []>("op_1543_cast_fp16")];
+            tensor<bool, []> mh_w_23_transpose_x_0 = const()[name = tensor<string, []>("mh_w_23_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_23_transpose_y_0 = const()[name = tensor<string, []>("mh_w_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_1539_cast_fp16, y = var_1543_cast_fp16)[name = tensor<string, []>("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1546_cast_fp16 = softmax(axis = var_1478, x = mh_w_23_cast_fp16)[name = tensor<string, []>("op_1546_cast_fp16")];
+            tensor<int32, [4]> var_1547 = const()[name = tensor<string, []>("op_1547"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1548_cast_fp16 = reshape(shape = var_1547, x = value_23_cast_fp16)[name = tensor<string, []>("op_1548_cast_fp16")];
+            tensor<bool, []> attn_23_transpose_x_0 = const()[name = tensor<string, []>("attn_23_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_23_transpose_y_0 = const()[name = tensor<string, []>("attn_23_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1548_cast_fp16, y = var_1546_cast_fp16)[name = tensor<string, []>("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_89_cast_fp16 = reshape(shape = var_1551, x = attn_23_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> obj_47_pad_type_0 = const()[name = tensor<string, []>("obj_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_47_strides_0 = const()[name = tensor<string, []>("obj_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_47_pad_0 = const()[name = tensor<string, []>("obj_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_47_dilations_0 = const()[name = tensor<string, []>("obj_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_47_groups_0 = const()[name = tensor<string, []>("obj_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293268736)))];
+            tensor<fp16, [1024]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295365952)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_47_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = obj_47_dilations_0, groups = obj_47_groups_0, pad = obj_47_pad_0, pad_type = obj_47_pad_type_0, strides = obj_47_strides_0, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("obj_47_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_47_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = tensor<string, []>("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1569_to_fp16 = const()[name = tensor<string, []>("op_1569_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_1569_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [1024]> input_91_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_91_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295368064)))];
+            tensor<fp16, [1024]> input_91_beta_0_to_fp16 = const()[name = tensor<string, []>("input_91_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295370176)))];
+            tensor<fp16, []> input_91_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_91_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> input_93_pad_type_0 = const()[name = tensor<string, []>("input_93_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_93_strides_0 = const()[name = tensor<string, []>("input_93_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_93_pad_0 = const()[name = tensor<string, []>("input_93_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_93_dilations_0 = const()[name = tensor<string, []>("input_93_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_93_groups_0 = const()[name = tensor<string, []>("input_93_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295372288)))];
+            tensor<fp16, [4096]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303760960)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_93_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = input_93_dilations_0, groups = input_93_groups_0, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = input_93_strides_0, weight = layers_11_fc1_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> input_95_mode_0 = const()[name = tensor<string, []>("input_95_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> hidden_states_27_pad_type_0 = const()[name = tensor<string, []>("hidden_states_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_27_strides_0 = const()[name = tensor<string, []>("hidden_states_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_27_pad_0 = const()[name = tensor<string, []>("hidden_states_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_27_dilations_0 = const()[name = tensor<string, []>("hidden_states_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_27_groups_0 = const()[name = tensor<string, []>("hidden_states_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303769216)))];
+            tensor<fp16, [1024]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312157888)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_27_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = layers_11_fc2_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("hidden_states_27_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_27_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_1598 = const()[name = tensor<string, []>("op_1598"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_49_axes_0 = const()[name = tensor<string, []>("out_49_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1620_to_fp16 = const()[name = tensor<string, []>("op_1620_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1620_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("out_49_cast_fp16")];
+            tensor<fp16, [1024]> obj_49_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_49_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312160000)))];
+            tensor<fp16, [1024]> obj_49_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_49_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312162112)))];
+            tensor<fp16, []> obj_49_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_49_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_49_cast_fp16 = batch_norm(beta = obj_49_beta_0_to_fp16, epsilon = obj_49_epsilon_0_to_fp16, gamma = obj_49_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<string, []> query_25_pad_type_0 = const()[name = tensor<string, []>("query_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_25_strides_0 = const()[name = tensor<string, []>("query_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_25_pad_0 = const()[name = tensor<string, []>("query_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_25_dilations_0 = const()[name = tensor<string, []>("query_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_25_groups_0 = const()[name = tensor<string, []>("query_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312164224)))];
+            tensor<fp16, [1024]> layers_12_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314261440)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_25_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_bias_to_fp16, dilations = query_25_dilations_0, groups = query_25_groups_0, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = query_25_strides_0, weight = layers_12_self_attn_q_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor<string, []>("query_25_cast_fp16")];
+            tensor<string, []> key_25_pad_type_0 = const()[name = tensor<string, []>("key_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_25_strides_0 = const()[name = tensor<string, []>("key_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_25_pad_0 = const()[name = tensor<string, []>("key_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_25_dilations_0 = const()[name = tensor<string, []>("key_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_25_groups_0 = const()[name = tensor<string, []>("key_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314263552)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_25_cast_fp16 = conv(dilations = key_25_dilations_0, groups = key_25_groups_0, pad = key_25_pad_0, pad_type = key_25_pad_type_0, strides = key_25_strides_0, weight = layers_12_self_attn_k_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor<string, []>("key_25_cast_fp16")];
+            tensor<string, []> value_25_pad_type_0 = const()[name = tensor<string, []>("value_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_25_strides_0 = const()[name = tensor<string, []>("value_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_25_pad_0 = const()[name = tensor<string, []>("value_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_25_dilations_0 = const()[name = tensor<string, []>("value_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_25_groups_0 = const()[name = tensor<string, []>("value_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316360768)))];
+            tensor<fp16, [1024]> layers_12_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318457984)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_25_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_bias_to_fp16, dilations = value_25_dilations_0, groups = value_25_groups_0, pad = value_25_pad_0, pad_type = value_25_pad_type_0, strides = value_25_strides_0, weight = layers_12_self_attn_v_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor<string, []>("value_25_cast_fp16")];
+            tensor<int32, [4]> var_1656 = const()[name = tensor<string, []>("op_1656"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_25_cast_fp16 = reshape(shape = var_1656, x = query_25_cast_fp16)[name = tensor<string, []>("mh_q_25_cast_fp16")];
+            tensor<fp16, []> var_1658_to_fp16 = const()[name = tensor<string, []>("op_1658_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1659_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1658_to_fp16)[name = tensor<string, []>("op_1659_cast_fp16")];
+            tensor<int32, [4]> var_1662 = const()[name = tensor<string, []>("op_1662"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1663_cast_fp16 = reshape(shape = var_1662, x = key_25_cast_fp16)[name = tensor<string, []>("op_1663_cast_fp16")];
+            tensor<bool, []> mh_w_25_transpose_x_0 = const()[name = tensor<string, []>("mh_w_25_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_25_transpose_y_0 = const()[name = tensor<string, []>("mh_w_25_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1659_cast_fp16, y = var_1663_cast_fp16)[name = tensor<string, []>("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1666_cast_fp16 = softmax(axis = var_1598, x = mh_w_25_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<int32, [4]> var_1667 = const()[name = tensor<string, []>("op_1667"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1668_cast_fp16 = reshape(shape = var_1667, x = value_25_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<bool, []> attn_25_transpose_x_0 = const()[name = tensor<string, []>("attn_25_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_25_transpose_y_0 = const()[name = tensor<string, []>("attn_25_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1668_cast_fp16, y = var_1666_cast_fp16)[name = tensor<string, []>("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1671 = const()[name = tensor<string, []>("op_1671"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_97_cast_fp16 = reshape(shape = var_1671, x = attn_25_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> obj_51_pad_type_0 = const()[name = tensor<string, []>("obj_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_51_strides_0 = const()[name = tensor<string, []>("obj_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_51_pad_0 = const()[name = tensor<string, []>("obj_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_51_dilations_0 = const()[name = tensor<string, []>("obj_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_51_groups_0 = const()[name = tensor<string, []>("obj_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318460096)))];
+            tensor<fp16, [1024]> layers_12_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320557312)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_51_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_bias_to_fp16, dilations = obj_51_dilations_0, groups = obj_51_groups_0, pad = obj_51_pad_0, pad_type = obj_51_pad_type_0, strides = obj_51_strides_0, weight = layers_12_self_attn_o_proj_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_51_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> out_51_axes_0 = const()[name = tensor<string, []>("out_51_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1689_to_fp16 = const()[name = tensor<string, []>("op_1689_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_1689_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("out_51_cast_fp16")];
+            tensor<fp16, [1024]> input_99_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_99_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320559424)))];
+            tensor<fp16, [1024]> input_99_beta_0_to_fp16 = const()[name = tensor<string, []>("input_99_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320561536)))];
+            tensor<fp16, []> input_99_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_99_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_99_cast_fp16 = batch_norm(beta = input_99_beta_0_to_fp16, epsilon = input_99_epsilon_0_to_fp16, gamma = input_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_pad_type_0 = const()[name = tensor<string, []>("input_101_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_101_strides_0 = const()[name = tensor<string, []>("input_101_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_101_pad_0 = const()[name = tensor<string, []>("input_101_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_101_dilations_0 = const()[name = tensor<string, []>("input_101_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_101_groups_0 = const()[name = tensor<string, []>("input_101_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_12_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320563648)))];
+            tensor<fp16, [4096]> layers_12_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328952320)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_101_cast_fp16 = conv(bias = layers_12_fc1_bias_to_fp16, dilations = input_101_dilations_0, groups = input_101_groups_0, pad = input_101_pad_0, pad_type = input_101_pad_type_0, strides = input_101_strides_0, weight = layers_12_fc1_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> input_103_mode_0 = const()[name = tensor<string, []>("input_103_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_103_cast_fp16 = gelu(mode = input_103_mode_0, x = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> hidden_states_29_pad_type_0 = const()[name = tensor<string, []>("hidden_states_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_29_strides_0 = const()[name = tensor<string, []>("hidden_states_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_29_pad_0 = const()[name = tensor<string, []>("hidden_states_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_29_dilations_0 = const()[name = tensor<string, []>("hidden_states_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_29_groups_0 = const()[name = tensor<string, []>("hidden_states_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_12_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328960576)))];
+            tensor<fp16, [1024]> layers_12_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337349248)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_29_cast_fp16 = conv(bias = layers_12_fc2_bias_to_fp16, dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = layers_12_fc2_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("hidden_states_29_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = hidden_states_29_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_1718 = const()[name = tensor<string, []>("op_1718"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_53_axes_0 = const()[name = tensor<string, []>("out_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1740_to_fp16 = const()[name = tensor<string, []>("op_1740_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_1740_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("out_53_cast_fp16")];
+            tensor<fp16, [1024]> obj_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_53_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337351360)))];
+            tensor<fp16, [1024]> obj_53_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_53_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337353472)))];
+            tensor<fp16, []> obj_53_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_53_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_53_cast_fp16 = batch_norm(beta = obj_53_beta_0_to_fp16, epsilon = obj_53_epsilon_0_to_fp16, gamma = obj_53_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<string, []> query_27_pad_type_0 = const()[name = tensor<string, []>("query_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_27_strides_0 = const()[name = tensor<string, []>("query_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_27_pad_0 = const()[name = tensor<string, []>("query_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_27_dilations_0 = const()[name = tensor<string, []>("query_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_27_groups_0 = const()[name = tensor<string, []>("query_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337355584)))];
+            tensor<fp16, [1024]> layers_13_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339452800)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_27_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_bias_to_fp16, dilations = query_27_dilations_0, groups = query_27_groups_0, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = query_27_strides_0, weight = layers_13_self_attn_q_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor<string, []>("query_27_cast_fp16")];
+            tensor<string, []> key_27_pad_type_0 = const()[name = tensor<string, []>("key_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_27_strides_0 = const()[name = tensor<string, []>("key_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_27_pad_0 = const()[name = tensor<string, []>("key_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_27_dilations_0 = const()[name = tensor<string, []>("key_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_27_groups_0 = const()[name = tensor<string, []>("key_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339454912)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_27_cast_fp16 = conv(dilations = key_27_dilations_0, groups = key_27_groups_0, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = key_27_strides_0, weight = layers_13_self_attn_k_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor<string, []>("key_27_cast_fp16")];
+            tensor<string, []> value_27_pad_type_0 = const()[name = tensor<string, []>("value_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_27_strides_0 = const()[name = tensor<string, []>("value_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_27_pad_0 = const()[name = tensor<string, []>("value_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_27_dilations_0 = const()[name = tensor<string, []>("value_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_27_groups_0 = const()[name = tensor<string, []>("value_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(341552128)))];
+            tensor<fp16, [1024]> layers_13_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343649344)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_27_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_bias_to_fp16, dilations = value_27_dilations_0, groups = value_27_groups_0, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = value_27_strides_0, weight = layers_13_self_attn_v_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor<string, []>("value_27_cast_fp16")];
+            tensor<int32, [4]> var_1776 = const()[name = tensor<string, []>("op_1776"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_27_cast_fp16 = reshape(shape = var_1776, x = query_27_cast_fp16)[name = tensor<string, []>("mh_q_27_cast_fp16")];
+            tensor<fp16, []> var_1778_to_fp16 = const()[name = tensor<string, []>("op_1778_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1779_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1778_to_fp16)[name = tensor<string, []>("op_1779_cast_fp16")];
+            tensor<int32, [4]> var_1782 = const()[name = tensor<string, []>("op_1782"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1783_cast_fp16 = reshape(shape = var_1782, x = key_27_cast_fp16)[name = tensor<string, []>("op_1783_cast_fp16")];
+            tensor<bool, []> mh_w_27_transpose_x_0 = const()[name = tensor<string, []>("mh_w_27_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_27_transpose_y_0 = const()[name = tensor<string, []>("mh_w_27_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_27_cast_fp16 = matmul(transpose_x = mh_w_27_transpose_x_0, transpose_y = mh_w_27_transpose_y_0, x = var_1779_cast_fp16, y = var_1783_cast_fp16)[name = tensor<string, []>("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1786_cast_fp16 = softmax(axis = var_1718, x = mh_w_27_cast_fp16)[name = tensor<string, []>("op_1786_cast_fp16")];
+            tensor<int32, [4]> var_1787 = const()[name = tensor<string, []>("op_1787"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1788_cast_fp16 = reshape(shape = var_1787, x = value_27_cast_fp16)[name = tensor<string, []>("op_1788_cast_fp16")];
+            tensor<bool, []> attn_27_transpose_x_0 = const()[name = tensor<string, []>("attn_27_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_27_transpose_y_0 = const()[name = tensor<string, []>("attn_27_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1788_cast_fp16, y = var_1786_cast_fp16)[name = tensor<string, []>("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1791 = const()[name = tensor<string, []>("op_1791"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_105_cast_fp16 = reshape(shape = var_1791, x = attn_27_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> obj_55_pad_type_0 = const()[name = tensor<string, []>("obj_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_55_strides_0 = const()[name = tensor<string, []>("obj_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_55_pad_0 = const()[name = tensor<string, []>("obj_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_55_dilations_0 = const()[name = tensor<string, []>("obj_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_55_groups_0 = const()[name = tensor<string, []>("obj_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343651456)))];
+            tensor<fp16, [1024]> layers_13_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345748672)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_55_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_bias_to_fp16, dilations = obj_55_dilations_0, groups = obj_55_groups_0, pad = obj_55_pad_0, pad_type = obj_55_pad_type_0, strides = obj_55_strides_0, weight = layers_13_self_attn_o_proj_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> out_55_axes_0 = const()[name = tensor<string, []>("out_55_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1809_to_fp16 = const()[name = tensor<string, []>("op_1809_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_1809_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("out_55_cast_fp16")];
+            tensor<fp16, [1024]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345750784)))];
+            tensor<fp16, [1024]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345752896)))];
+            tensor<fp16, []> input_107_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_107_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_107_cast_fp16 = batch_norm(beta = input_107_beta_0_to_fp16, epsilon = input_107_epsilon_0_to_fp16, gamma = input_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_13_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345755008)))];
+            tensor<fp16, [4096]> layers_13_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354143680)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_109_cast_fp16 = conv(bias = layers_13_fc1_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = layers_13_fc1_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> hidden_states_31_pad_type_0 = const()[name = tensor<string, []>("hidden_states_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = tensor<string, []>("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = tensor<string, []>("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = tensor<string, []>("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_31_groups_0 = const()[name = tensor<string, []>("hidden_states_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_13_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354151936)))];
+            tensor<fp16, [1024]> layers_13_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362540608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_31_cast_fp16 = conv(bias = layers_13_fc2_bias_to_fp16, dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = layers_13_fc2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("hidden_states_31_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_1838 = const()[name = tensor<string, []>("op_1838"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_57_axes_0 = const()[name = tensor<string, []>("out_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1860_to_fp16 = const()[name = tensor<string, []>("op_1860_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_1860_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("out_57_cast_fp16")];
+            tensor<fp16, [1024]> obj_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362542720)))];
+            tensor<fp16, [1024]> obj_57_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_57_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362544832)))];
+            tensor<fp16, []> obj_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor<string, []>("obj_57_cast_fp16")];
+            tensor<string, []> query_29_pad_type_0 = const()[name = tensor<string, []>("query_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_29_strides_0 = const()[name = tensor<string, []>("query_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_29_pad_0 = const()[name = tensor<string, []>("query_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_29_dilations_0 = const()[name = tensor<string, []>("query_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_29_groups_0 = const()[name = tensor<string, []>("query_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362546944)))];
+            tensor<fp16, [1024]> layers_14_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364644160)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_29_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_bias_to_fp16, dilations = query_29_dilations_0, groups = query_29_groups_0, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = query_29_strides_0, weight = layers_14_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("query_29_cast_fp16")];
+            tensor<string, []> key_29_pad_type_0 = const()[name = tensor<string, []>("key_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_29_strides_0 = const()[name = tensor<string, []>("key_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_29_pad_0 = const()[name = tensor<string, []>("key_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_29_dilations_0 = const()[name = tensor<string, []>("key_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_29_groups_0 = const()[name = tensor<string, []>("key_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364646272)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_29_cast_fp16 = conv(dilations = key_29_dilations_0, groups = key_29_groups_0, pad = key_29_pad_0, pad_type = key_29_pad_type_0, strides = key_29_strides_0, weight = layers_14_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("key_29_cast_fp16")];
+            tensor<string, []> value_29_pad_type_0 = const()[name = tensor<string, []>("value_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_29_strides_0 = const()[name = tensor<string, []>("value_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_29_pad_0 = const()[name = tensor<string, []>("value_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_29_dilations_0 = const()[name = tensor<string, []>("value_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_29_groups_0 = const()[name = tensor<string, []>("value_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(366743488)))];
+            tensor<fp16, [1024]> layers_14_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368840704)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_29_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_bias_to_fp16, dilations = value_29_dilations_0, groups = value_29_groups_0, pad = value_29_pad_0, pad_type = value_29_pad_type_0, strides = value_29_strides_0, weight = layers_14_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("value_29_cast_fp16")];
+            tensor<int32, [4]> var_1896 = const()[name = tensor<string, []>("op_1896"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_29_cast_fp16 = reshape(shape = var_1896, x = query_29_cast_fp16)[name = tensor<string, []>("mh_q_29_cast_fp16")];
+            tensor<fp16, []> var_1898_to_fp16 = const()[name = tensor<string, []>("op_1898_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_1899_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_1898_to_fp16)[name = tensor<string, []>("op_1899_cast_fp16")];
+            tensor<int32, [4]> var_1902 = const()[name = tensor<string, []>("op_1902"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1903_cast_fp16 = reshape(shape = var_1902, x = key_29_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<bool, []> mh_w_29_transpose_x_0 = const()[name = tensor<string, []>("mh_w_29_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_29_transpose_y_0 = const()[name = tensor<string, []>("mh_w_29_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1899_cast_fp16, y = var_1903_cast_fp16)[name = tensor<string, []>("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1906_cast_fp16 = softmax(axis = var_1838, x = mh_w_29_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<int32, [4]> var_1907 = const()[name = tensor<string, []>("op_1907"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1908_cast_fp16 = reshape(shape = var_1907, x = value_29_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<bool, []> attn_29_transpose_x_0 = const()[name = tensor<string, []>("attn_29_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_29_transpose_y_0 = const()[name = tensor<string, []>("attn_29_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_1908_cast_fp16, y = var_1906_cast_fp16)[name = tensor<string, []>("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_1911 = const()[name = tensor<string, []>("op_1911"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_113_cast_fp16 = reshape(shape = var_1911, x = attn_29_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> obj_59_pad_type_0 = const()[name = tensor<string, []>("obj_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_59_strides_0 = const()[name = tensor<string, []>("obj_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_59_pad_0 = const()[name = tensor<string, []>("obj_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_59_dilations_0 = const()[name = tensor<string, []>("obj_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_59_groups_0 = const()[name = tensor<string, []>("obj_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368842816)))];
+            tensor<fp16, [1024]> layers_14_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370940032)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_59_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_bias_to_fp16, dilations = obj_59_dilations_0, groups = obj_59_groups_0, pad = obj_59_pad_0, pad_type = obj_59_pad_type_0, strides = obj_59_strides_0, weight = layers_14_self_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("obj_59_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_59_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> out_59_axes_0 = const()[name = tensor<string, []>("out_59_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1929_to_fp16 = const()[name = tensor<string, []>("op_1929_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_1929_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("out_59_cast_fp16")];
+            tensor<fp16, [1024]> input_115_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_115_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370942144)))];
+            tensor<fp16, [1024]> input_115_beta_0_to_fp16 = const()[name = tensor<string, []>("input_115_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370944256)))];
+            tensor<fp16, []> input_115_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_115_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> input_117_pad_type_0 = const()[name = tensor<string, []>("input_117_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_117_strides_0 = const()[name = tensor<string, []>("input_117_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_117_pad_0 = const()[name = tensor<string, []>("input_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_117_dilations_0 = const()[name = tensor<string, []>("input_117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_117_groups_0 = const()[name = tensor<string, []>("input_117_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_14_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370946368)))];
+            tensor<fp16, [4096]> layers_14_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379335040)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_117_cast_fp16 = conv(bias = layers_14_fc1_bias_to_fp16, dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = layers_14_fc1_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_mode_0 = const()[name = tensor<string, []>("input_119_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> hidden_states_33_pad_type_0 = const()[name = tensor<string, []>("hidden_states_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_33_strides_0 = const()[name = tensor<string, []>("hidden_states_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_33_pad_0 = const()[name = tensor<string, []>("hidden_states_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_33_dilations_0 = const()[name = tensor<string, []>("hidden_states_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_33_groups_0 = const()[name = tensor<string, []>("hidden_states_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_14_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379343296)))];
+            tensor<fp16, [1024]> layers_14_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387731968)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_33_cast_fp16 = conv(bias = layers_14_fc2_bias_to_fp16, dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = layers_14_fc2_weight_to_fp16, x = input_119_cast_fp16)[name = tensor<string, []>("hidden_states_33_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_33_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_1958 = const()[name = tensor<string, []>("op_1958"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_61_axes_0 = const()[name = tensor<string, []>("out_61_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1980_to_fp16 = const()[name = tensor<string, []>("op_1980_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_1980_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("out_61_cast_fp16")];
+            tensor<fp16, [1024]> obj_61_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387734080)))];
+            tensor<fp16, [1024]> obj_61_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_61_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387736192)))];
+            tensor<fp16, []> obj_61_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_61_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor<string, []>("obj_61_cast_fp16")];
+            tensor<string, []> query_31_pad_type_0 = const()[name = tensor<string, []>("query_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_31_strides_0 = const()[name = tensor<string, []>("query_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_31_pad_0 = const()[name = tensor<string, []>("query_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_31_dilations_0 = const()[name = tensor<string, []>("query_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_31_groups_0 = const()[name = tensor<string, []>("query_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387738304)))];
+            tensor<fp16, [1024]> layers_15_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389835520)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_31_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_bias_to_fp16, dilations = query_31_dilations_0, groups = query_31_groups_0, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = query_31_strides_0, weight = layers_15_self_attn_q_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor<string, []>("query_31_cast_fp16")];
+            tensor<string, []> key_31_pad_type_0 = const()[name = tensor<string, []>("key_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_31_strides_0 = const()[name = tensor<string, []>("key_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_31_pad_0 = const()[name = tensor<string, []>("key_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_31_dilations_0 = const()[name = tensor<string, []>("key_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_31_groups_0 = const()[name = tensor<string, []>("key_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389837632)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_31_cast_fp16 = conv(dilations = key_31_dilations_0, groups = key_31_groups_0, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = key_31_strides_0, weight = layers_15_self_attn_k_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor<string, []>("key_31_cast_fp16")];
+            tensor<string, []> value_31_pad_type_0 = const()[name = tensor<string, []>("value_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_31_strides_0 = const()[name = tensor<string, []>("value_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_31_pad_0 = const()[name = tensor<string, []>("value_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_31_dilations_0 = const()[name = tensor<string, []>("value_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_31_groups_0 = const()[name = tensor<string, []>("value_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(391934848)))];
+            tensor<fp16, [1024]> layers_15_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394032064)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_31_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_bias_to_fp16, dilations = value_31_dilations_0, groups = value_31_groups_0, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = value_31_strides_0, weight = layers_15_self_attn_v_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor<string, []>("value_31_cast_fp16")];
+            tensor<int32, [4]> var_2016 = const()[name = tensor<string, []>("op_2016"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_31_cast_fp16 = reshape(shape = var_2016, x = query_31_cast_fp16)[name = tensor<string, []>("mh_q_31_cast_fp16")];
+            tensor<fp16, []> var_2018_to_fp16 = const()[name = tensor<string, []>("op_2018_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2019_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2018_to_fp16)[name = tensor<string, []>("op_2019_cast_fp16")];
+            tensor<int32, [4]> var_2022 = const()[name = tensor<string, []>("op_2022"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2023_cast_fp16 = reshape(shape = var_2022, x = key_31_cast_fp16)[name = tensor<string, []>("op_2023_cast_fp16")];
+            tensor<bool, []> mh_w_31_transpose_x_0 = const()[name = tensor<string, []>("mh_w_31_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_31_transpose_y_0 = const()[name = tensor<string, []>("mh_w_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_2019_cast_fp16, y = var_2023_cast_fp16)[name = tensor<string, []>("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2026_cast_fp16 = softmax(axis = var_1958, x = mh_w_31_cast_fp16)[name = tensor<string, []>("op_2026_cast_fp16")];
+            tensor<int32, [4]> var_2027 = const()[name = tensor<string, []>("op_2027"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2028_cast_fp16 = reshape(shape = var_2027, x = value_31_cast_fp16)[name = tensor<string, []>("op_2028_cast_fp16")];
+            tensor<bool, []> attn_31_transpose_x_0 = const()[name = tensor<string, []>("attn_31_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_31_transpose_y_0 = const()[name = tensor<string, []>("attn_31_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2028_cast_fp16, y = var_2026_cast_fp16)[name = tensor<string, []>("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_2031 = const()[name = tensor<string, []>("op_2031"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_121_cast_fp16 = reshape(shape = var_2031, x = attn_31_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> obj_63_pad_type_0 = const()[name = tensor<string, []>("obj_63_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_63_strides_0 = const()[name = tensor<string, []>("obj_63_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_63_pad_0 = const()[name = tensor<string, []>("obj_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_63_dilations_0 = const()[name = tensor<string, []>("obj_63_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_63_groups_0 = const()[name = tensor<string, []>("obj_63_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394034176)))];
+            tensor<fp16, [1024]> layers_15_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396131392)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_63_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_bias_to_fp16, dilations = obj_63_dilations_0, groups = obj_63_groups_0, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = obj_63_strides_0, weight = layers_15_self_attn_o_proj_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("obj_63_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_63_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> out_63_axes_0 = const()[name = tensor<string, []>("out_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2049_to_fp16 = const()[name = tensor<string, []>("op_2049_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_2049_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("out_63_cast_fp16")];
+            tensor<fp16, [1024]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396133504)))];
+            tensor<fp16, [1024]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396135616)))];
+            tensor<fp16, []> input_123_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_123_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_123_cast_fp16 = batch_norm(beta = input_123_beta_0_to_fp16, epsilon = input_123_epsilon_0_to_fp16, gamma = input_123_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> input_125_pad_type_0 = const()[name = tensor<string, []>("input_125_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_125_strides_0 = const()[name = tensor<string, []>("input_125_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_125_pad_0 = const()[name = tensor<string, []>("input_125_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_125_dilations_0 = const()[name = tensor<string, []>("input_125_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_125_groups_0 = const()[name = tensor<string, []>("input_125_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_15_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396137728)))];
+            tensor<fp16, [4096]> layers_15_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404526400)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_125_cast_fp16 = conv(bias = layers_15_fc1_bias_to_fp16, dilations = input_125_dilations_0, groups = input_125_groups_0, pad = input_125_pad_0, pad_type = input_125_pad_type_0, strides = input_125_strides_0, weight = layers_15_fc1_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> input_127_mode_0 = const()[name = tensor<string, []>("input_127_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_127_cast_fp16 = gelu(mode = input_127_mode_0, x = input_125_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> hidden_states_35_pad_type_0 = const()[name = tensor<string, []>("hidden_states_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_35_strides_0 = const()[name = tensor<string, []>("hidden_states_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_35_pad_0 = const()[name = tensor<string, []>("hidden_states_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_35_dilations_0 = const()[name = tensor<string, []>("hidden_states_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_35_groups_0 = const()[name = tensor<string, []>("hidden_states_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_15_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404534656)))];
+            tensor<fp16, [1024]> layers_15_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412923328)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_35_cast_fp16 = conv(bias = layers_15_fc2_bias_to_fp16, dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = layers_15_fc2_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("hidden_states_35_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = hidden_states_35_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_2078 = const()[name = tensor<string, []>("op_2078"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_65_axes_0 = const()[name = tensor<string, []>("out_65_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2100_to_fp16 = const()[name = tensor<string, []>("op_2100_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_2100_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("out_65_cast_fp16")];
+            tensor<fp16, [1024]> obj_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412925440)))];
+            tensor<fp16, [1024]> obj_65_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_65_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412927552)))];
+            tensor<fp16, []> obj_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor<string, []>("obj_65_cast_fp16")];
+            tensor<string, []> query_33_pad_type_0 = const()[name = tensor<string, []>("query_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_33_strides_0 = const()[name = tensor<string, []>("query_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_33_pad_0 = const()[name = tensor<string, []>("query_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_33_dilations_0 = const()[name = tensor<string, []>("query_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_33_groups_0 = const()[name = tensor<string, []>("query_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412929664)))];
+            tensor<fp16, [1024]> layers_16_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415026880)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_33_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_bias_to_fp16, dilations = query_33_dilations_0, groups = query_33_groups_0, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = query_33_strides_0, weight = layers_16_self_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("query_33_cast_fp16")];
+            tensor<string, []> key_33_pad_type_0 = const()[name = tensor<string, []>("key_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_33_strides_0 = const()[name = tensor<string, []>("key_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_33_pad_0 = const()[name = tensor<string, []>("key_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_33_dilations_0 = const()[name = tensor<string, []>("key_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_33_groups_0 = const()[name = tensor<string, []>("key_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415028992)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_33_cast_fp16 = conv(dilations = key_33_dilations_0, groups = key_33_groups_0, pad = key_33_pad_0, pad_type = key_33_pad_type_0, strides = key_33_strides_0, weight = layers_16_self_attn_k_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("key_33_cast_fp16")];
+            tensor<string, []> value_33_pad_type_0 = const()[name = tensor<string, []>("value_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_33_strides_0 = const()[name = tensor<string, []>("value_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_33_pad_0 = const()[name = tensor<string, []>("value_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_33_dilations_0 = const()[name = tensor<string, []>("value_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_33_groups_0 = const()[name = tensor<string, []>("value_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417126208)))];
+            tensor<fp16, [1024]> layers_16_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419223424)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_33_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_bias_to_fp16, dilations = value_33_dilations_0, groups = value_33_groups_0, pad = value_33_pad_0, pad_type = value_33_pad_type_0, strides = value_33_strides_0, weight = layers_16_self_attn_v_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("value_33_cast_fp16")];
+            tensor<int32, [4]> var_2136 = const()[name = tensor<string, []>("op_2136"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_33_cast_fp16 = reshape(shape = var_2136, x = query_33_cast_fp16)[name = tensor<string, []>("mh_q_33_cast_fp16")];
+            tensor<fp16, []> var_2138_to_fp16 = const()[name = tensor<string, []>("op_2138_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2139_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2138_to_fp16)[name = tensor<string, []>("op_2139_cast_fp16")];
+            tensor<int32, [4]> var_2142 = const()[name = tensor<string, []>("op_2142"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2143_cast_fp16 = reshape(shape = var_2142, x = key_33_cast_fp16)[name = tensor<string, []>("op_2143_cast_fp16")];
+            tensor<bool, []> mh_w_33_transpose_x_0 = const()[name = tensor<string, []>("mh_w_33_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_33_transpose_y_0 = const()[name = tensor<string, []>("mh_w_33_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_2139_cast_fp16, y = var_2143_cast_fp16)[name = tensor<string, []>("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2146_cast_fp16 = softmax(axis = var_2078, x = mh_w_33_cast_fp16)[name = tensor<string, []>("op_2146_cast_fp16")];
+            tensor<int32, [4]> var_2147 = const()[name = tensor<string, []>("op_2147"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2148_cast_fp16 = reshape(shape = var_2147, x = value_33_cast_fp16)[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<bool, []> attn_33_transpose_x_0 = const()[name = tensor<string, []>("attn_33_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_33_transpose_y_0 = const()[name = tensor<string, []>("attn_33_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2148_cast_fp16, y = var_2146_cast_fp16)[name = tensor<string, []>("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_2151 = const()[name = tensor<string, []>("op_2151"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_129_cast_fp16 = reshape(shape = var_2151, x = attn_33_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> obj_67_pad_type_0 = const()[name = tensor<string, []>("obj_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_67_strides_0 = const()[name = tensor<string, []>("obj_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_67_pad_0 = const()[name = tensor<string, []>("obj_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_67_dilations_0 = const()[name = tensor<string, []>("obj_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_67_groups_0 = const()[name = tensor<string, []>("obj_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419225536)))];
+            tensor<fp16, [1024]> layers_16_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421322752)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_67_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_bias_to_fp16, dilations = obj_67_dilations_0, groups = obj_67_groups_0, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = obj_67_strides_0, weight = layers_16_self_attn_o_proj_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("obj_67_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = obj_67_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> out_67_axes_0 = const()[name = tensor<string, []>("out_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2169_to_fp16 = const()[name = tensor<string, []>("op_2169_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_2169_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("out_67_cast_fp16")];
+            tensor<fp16, [1024]> input_131_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_131_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421324864)))];
+            tensor<fp16, [1024]> input_131_beta_0_to_fp16 = const()[name = tensor<string, []>("input_131_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421326976)))];
+            tensor<fp16, []> input_131_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_131_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_131_cast_fp16 = batch_norm(beta = input_131_beta_0_to_fp16, epsilon = input_131_epsilon_0_to_fp16, gamma = input_131_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> input_133_pad_type_0 = const()[name = tensor<string, []>("input_133_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_133_strides_0 = const()[name = tensor<string, []>("input_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_133_pad_0 = const()[name = tensor<string, []>("input_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_133_dilations_0 = const()[name = tensor<string, []>("input_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_133_groups_0 = const()[name = tensor<string, []>("input_133_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_16_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421329088)))];
+            tensor<fp16, [4096]> layers_16_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429717760)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_133_cast_fp16 = conv(bias = layers_16_fc1_bias_to_fp16, dilations = input_133_dilations_0, groups = input_133_groups_0, pad = input_133_pad_0, pad_type = input_133_pad_type_0, strides = input_133_strides_0, weight = layers_16_fc1_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> input_135_mode_0 = const()[name = tensor<string, []>("input_135_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_135_cast_fp16 = gelu(mode = input_135_mode_0, x = input_133_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> hidden_states_37_pad_type_0 = const()[name = tensor<string, []>("hidden_states_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_37_strides_0 = const()[name = tensor<string, []>("hidden_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_37_pad_0 = const()[name = tensor<string, []>("hidden_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_37_dilations_0 = const()[name = tensor<string, []>("hidden_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_37_groups_0 = const()[name = tensor<string, []>("hidden_states_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_16_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429726016)))];
+            tensor<fp16, [1024]> layers_16_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438114688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_37_cast_fp16 = conv(bias = layers_16_fc2_bias_to_fp16, dilations = hidden_states_37_dilations_0, groups = hidden_states_37_groups_0, pad = hidden_states_37_pad_0, pad_type = hidden_states_37_pad_type_0, strides = hidden_states_37_strides_0, weight = layers_16_fc2_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("hidden_states_37_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_2198 = const()[name = tensor<string, []>("op_2198"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_69_axes_0 = const()[name = tensor<string, []>("out_69_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2220_to_fp16 = const()[name = tensor<string, []>("op_2220_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_2220_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("out_69_cast_fp16")];
+            tensor<fp16, [1024]> obj_69_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438116800)))];
+            tensor<fp16, [1024]> obj_69_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_69_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438118912)))];
+            tensor<fp16, []> obj_69_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_69_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor<string, []>("obj_69_cast_fp16")];
+            tensor<string, []> query_35_pad_type_0 = const()[name = tensor<string, []>("query_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_35_strides_0 = const()[name = tensor<string, []>("query_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_35_pad_0 = const()[name = tensor<string, []>("query_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_35_dilations_0 = const()[name = tensor<string, []>("query_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_35_groups_0 = const()[name = tensor<string, []>("query_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438121024)))];
+            tensor<fp16, [1024]> layers_17_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440218240)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_35_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_bias_to_fp16, dilations = query_35_dilations_0, groups = query_35_groups_0, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = query_35_strides_0, weight = layers_17_self_attn_q_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor<string, []>("query_35_cast_fp16")];
+            tensor<string, []> key_35_pad_type_0 = const()[name = tensor<string, []>("key_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_35_strides_0 = const()[name = tensor<string, []>("key_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_35_pad_0 = const()[name = tensor<string, []>("key_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_35_dilations_0 = const()[name = tensor<string, []>("key_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_35_groups_0 = const()[name = tensor<string, []>("key_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440220352)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_35_cast_fp16 = conv(dilations = key_35_dilations_0, groups = key_35_groups_0, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = key_35_strides_0, weight = layers_17_self_attn_k_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor<string, []>("key_35_cast_fp16")];
+            tensor<string, []> value_35_pad_type_0 = const()[name = tensor<string, []>("value_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_35_strides_0 = const()[name = tensor<string, []>("value_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_35_pad_0 = const()[name = tensor<string, []>("value_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_35_dilations_0 = const()[name = tensor<string, []>("value_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_35_groups_0 = const()[name = tensor<string, []>("value_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(442317568)))];
+            tensor<fp16, [1024]> layers_17_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444414784)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_35_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_bias_to_fp16, dilations = value_35_dilations_0, groups = value_35_groups_0, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = value_35_strides_0, weight = layers_17_self_attn_v_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor<string, []>("value_35_cast_fp16")];
+            tensor<int32, [4]> var_2256 = const()[name = tensor<string, []>("op_2256"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_35_cast_fp16 = reshape(shape = var_2256, x = query_35_cast_fp16)[name = tensor<string, []>("mh_q_35_cast_fp16")];
+            tensor<fp16, []> var_2258_to_fp16 = const()[name = tensor<string, []>("op_2258_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2259_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2258_to_fp16)[name = tensor<string, []>("op_2259_cast_fp16")];
+            tensor<int32, [4]> var_2262 = const()[name = tensor<string, []>("op_2262"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2263_cast_fp16 = reshape(shape = var_2262, x = key_35_cast_fp16)[name = tensor<string, []>("op_2263_cast_fp16")];
+            tensor<bool, []> mh_w_35_transpose_x_0 = const()[name = tensor<string, []>("mh_w_35_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_35_transpose_y_0 = const()[name = tensor<string, []>("mh_w_35_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_2259_cast_fp16, y = var_2263_cast_fp16)[name = tensor<string, []>("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2266_cast_fp16 = softmax(axis = var_2198, x = mh_w_35_cast_fp16)[name = tensor<string, []>("op_2266_cast_fp16")];
+            tensor<int32, [4]> var_2267 = const()[name = tensor<string, []>("op_2267"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2268_cast_fp16 = reshape(shape = var_2267, x = value_35_cast_fp16)[name = tensor<string, []>("op_2268_cast_fp16")];
+            tensor<bool, []> attn_35_transpose_x_0 = const()[name = tensor<string, []>("attn_35_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_35_transpose_y_0 = const()[name = tensor<string, []>("attn_35_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2268_cast_fp16, y = var_2266_cast_fp16)[name = tensor<string, []>("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_2271 = const()[name = tensor<string, []>("op_2271"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_137_cast_fp16 = reshape(shape = var_2271, x = attn_35_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> obj_71_pad_type_0 = const()[name = tensor<string, []>("obj_71_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_71_strides_0 = const()[name = tensor<string, []>("obj_71_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_71_pad_0 = const()[name = tensor<string, []>("obj_71_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_71_dilations_0 = const()[name = tensor<string, []>("obj_71_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_71_groups_0 = const()[name = tensor<string, []>("obj_71_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444416896)))];
+            tensor<fp16, [1024]> layers_17_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446514112)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_71_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_bias_to_fp16, dilations = obj_71_dilations_0, groups = obj_71_groups_0, pad = obj_71_pad_0, pad_type = obj_71_pad_type_0, strides = obj_71_strides_0, weight = layers_17_self_attn_o_proj_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("obj_71_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_71_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> out_71_axes_0 = const()[name = tensor<string, []>("out_71_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2289_to_fp16 = const()[name = tensor<string, []>("op_2289_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_2289_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("out_71_cast_fp16")];
+            tensor<fp16, [1024]> input_139_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_139_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446516224)))];
+            tensor<fp16, [1024]> input_139_beta_0_to_fp16 = const()[name = tensor<string, []>("input_139_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446518336)))];
+            tensor<fp16, []> input_139_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_139_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_139_cast_fp16 = batch_norm(beta = input_139_beta_0_to_fp16, epsilon = input_139_epsilon_0_to_fp16, gamma = input_139_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_pad_type_0 = const()[name = tensor<string, []>("input_141_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_141_strides_0 = const()[name = tensor<string, []>("input_141_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_141_pad_0 = const()[name = tensor<string, []>("input_141_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_141_dilations_0 = const()[name = tensor<string, []>("input_141_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_141_groups_0 = const()[name = tensor<string, []>("input_141_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_17_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446520448)))];
+            tensor<fp16, [4096]> layers_17_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454909120)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_141_cast_fp16 = conv(bias = layers_17_fc1_bias_to_fp16, dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = layers_17_fc1_weight_to_fp16, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> input_143_mode_0 = const()[name = tensor<string, []>("input_143_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_143_cast_fp16 = gelu(mode = input_143_mode_0, x = input_141_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> hidden_states_39_pad_type_0 = const()[name = tensor<string, []>("hidden_states_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = tensor<string, []>("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = tensor<string, []>("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = tensor<string, []>("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_39_groups_0 = const()[name = tensor<string, []>("hidden_states_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_17_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454917376)))];
+            tensor<fp16, [1024]> layers_17_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463306048)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_39_cast_fp16 = conv(bias = layers_17_fc2_bias_to_fp16, dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = layers_17_fc2_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("hidden_states_39_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_2318 = const()[name = tensor<string, []>("op_2318"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_73_axes_0 = const()[name = tensor<string, []>("out_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2340_to_fp16 = const()[name = tensor<string, []>("op_2340_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_2340_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("out_73_cast_fp16")];
+            tensor<fp16, [1024]> obj_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_73_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463308160)))];
+            tensor<fp16, [1024]> obj_73_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_73_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463310272)))];
+            tensor<fp16, []> obj_73_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_73_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_73_cast_fp16 = batch_norm(beta = obj_73_beta_0_to_fp16, epsilon = obj_73_epsilon_0_to_fp16, gamma = obj_73_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = tensor<string, []>("obj_73_cast_fp16")];
+            tensor<string, []> query_37_pad_type_0 = const()[name = tensor<string, []>("query_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_37_strides_0 = const()[name = tensor<string, []>("query_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_37_pad_0 = const()[name = tensor<string, []>("query_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_37_dilations_0 = const()[name = tensor<string, []>("query_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_37_groups_0 = const()[name = tensor<string, []>("query_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463312384)))];
+            tensor<fp16, [1024]> layers_18_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465409600)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_37_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_bias_to_fp16, dilations = query_37_dilations_0, groups = query_37_groups_0, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = query_37_strides_0, weight = layers_18_self_attn_q_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor<string, []>("query_37_cast_fp16")];
+            tensor<string, []> key_37_pad_type_0 = const()[name = tensor<string, []>("key_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_37_strides_0 = const()[name = tensor<string, []>("key_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_37_pad_0 = const()[name = tensor<string, []>("key_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_37_dilations_0 = const()[name = tensor<string, []>("key_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_37_groups_0 = const()[name = tensor<string, []>("key_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465411712)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_37_cast_fp16 = conv(dilations = key_37_dilations_0, groups = key_37_groups_0, pad = key_37_pad_0, pad_type = key_37_pad_type_0, strides = key_37_strides_0, weight = layers_18_self_attn_k_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor<string, []>("key_37_cast_fp16")];
+            tensor<string, []> value_37_pad_type_0 = const()[name = tensor<string, []>("value_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_37_strides_0 = const()[name = tensor<string, []>("value_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_37_pad_0 = const()[name = tensor<string, []>("value_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_37_dilations_0 = const()[name = tensor<string, []>("value_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_37_groups_0 = const()[name = tensor<string, []>("value_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467508928)))];
+            tensor<fp16, [1024]> layers_18_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469606144)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_37_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_bias_to_fp16, dilations = value_37_dilations_0, groups = value_37_groups_0, pad = value_37_pad_0, pad_type = value_37_pad_type_0, strides = value_37_strides_0, weight = layers_18_self_attn_v_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor<string, []>("value_37_cast_fp16")];
+            tensor<int32, [4]> var_2376 = const()[name = tensor<string, []>("op_2376"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_37_cast_fp16 = reshape(shape = var_2376, x = query_37_cast_fp16)[name = tensor<string, []>("mh_q_37_cast_fp16")];
+            tensor<fp16, []> var_2378_to_fp16 = const()[name = tensor<string, []>("op_2378_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2379_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2378_to_fp16)[name = tensor<string, []>("op_2379_cast_fp16")];
+            tensor<int32, [4]> var_2382 = const()[name = tensor<string, []>("op_2382"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2383_cast_fp16 = reshape(shape = var_2382, x = key_37_cast_fp16)[name = tensor<string, []>("op_2383_cast_fp16")];
+            tensor<bool, []> mh_w_37_transpose_x_0 = const()[name = tensor<string, []>("mh_w_37_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_37_transpose_y_0 = const()[name = tensor<string, []>("mh_w_37_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_2379_cast_fp16, y = var_2383_cast_fp16)[name = tensor<string, []>("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2386_cast_fp16 = softmax(axis = var_2318, x = mh_w_37_cast_fp16)[name = tensor<string, []>("op_2386_cast_fp16")];
+            tensor<int32, [4]> var_2387 = const()[name = tensor<string, []>("op_2387"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2388_cast_fp16 = reshape(shape = var_2387, x = value_37_cast_fp16)[name = tensor<string, []>("op_2388_cast_fp16")];
+            tensor<bool, []> attn_37_transpose_x_0 = const()[name = tensor<string, []>("attn_37_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_37_transpose_y_0 = const()[name = tensor<string, []>("attn_37_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2388_cast_fp16, y = var_2386_cast_fp16)[name = tensor<string, []>("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2391 = const()[name = tensor<string, []>("op_2391"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_145_cast_fp16 = reshape(shape = var_2391, x = attn_37_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> obj_75_pad_type_0 = const()[name = tensor<string, []>("obj_75_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_75_strides_0 = const()[name = tensor<string, []>("obj_75_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_75_pad_0 = const()[name = tensor<string, []>("obj_75_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_75_dilations_0 = const()[name = tensor<string, []>("obj_75_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_75_groups_0 = const()[name = tensor<string, []>("obj_75_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469608256)))];
+            tensor<fp16, [1024]> layers_18_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471705472)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_75_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_bias_to_fp16, dilations = obj_75_dilations_0, groups = obj_75_groups_0, pad = obj_75_pad_0, pad_type = obj_75_pad_type_0, strides = obj_75_strides_0, weight = layers_18_self_attn_o_proj_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("obj_75_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_75_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> out_75_axes_0 = const()[name = tensor<string, []>("out_75_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2409_to_fp16 = const()[name = tensor<string, []>("op_2409_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_75_cast_fp16 = layer_norm(axes = out_75_axes_0, epsilon = var_2409_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("out_75_cast_fp16")];
+            tensor<fp16, [1024]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471707584)))];
+            tensor<fp16, [1024]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471709696)))];
+            tensor<fp16, []> input_147_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_147_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_147_cast_fp16 = batch_norm(beta = input_147_beta_0_to_fp16, epsilon = input_147_epsilon_0_to_fp16, gamma = input_147_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_18_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471711808)))];
+            tensor<fp16, [4096]> layers_18_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480100480)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_149_cast_fp16 = conv(bias = layers_18_fc1_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = layers_18_fc1_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> hidden_states_41_pad_type_0 = const()[name = tensor<string, []>("hidden_states_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_41_strides_0 = const()[name = tensor<string, []>("hidden_states_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_41_pad_0 = const()[name = tensor<string, []>("hidden_states_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_41_dilations_0 = const()[name = tensor<string, []>("hidden_states_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_41_groups_0 = const()[name = tensor<string, []>("hidden_states_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_18_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480108736)))];
+            tensor<fp16, [1024]> layers_18_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488497408)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_41_cast_fp16 = conv(bias = layers_18_fc2_bias_to_fp16, dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = layers_18_fc2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("hidden_states_41_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_2438 = const()[name = tensor<string, []>("op_2438"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_77_axes_0 = const()[name = tensor<string, []>("out_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2460_to_fp16 = const()[name = tensor<string, []>("op_2460_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_77_cast_fp16 = layer_norm(axes = out_77_axes_0, epsilon = var_2460_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("out_77_cast_fp16")];
+            tensor<fp16, [1024]> obj_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_77_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488499520)))];
+            tensor<fp16, [1024]> obj_77_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_77_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488501632)))];
+            tensor<fp16, []> obj_77_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_77_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_77_cast_fp16 = batch_norm(beta = obj_77_beta_0_to_fp16, epsilon = obj_77_epsilon_0_to_fp16, gamma = obj_77_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = tensor<string, []>("obj_77_cast_fp16")];
+            tensor<string, []> query_39_pad_type_0 = const()[name = tensor<string, []>("query_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_39_strides_0 = const()[name = tensor<string, []>("query_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_39_pad_0 = const()[name = tensor<string, []>("query_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_39_dilations_0 = const()[name = tensor<string, []>("query_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_39_groups_0 = const()[name = tensor<string, []>("query_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488503744)))];
+            tensor<fp16, [1024]> layers_19_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490600960)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_39_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_bias_to_fp16, dilations = query_39_dilations_0, groups = query_39_groups_0, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = query_39_strides_0, weight = layers_19_self_attn_q_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor<string, []>("query_39_cast_fp16")];
+            tensor<string, []> key_39_pad_type_0 = const()[name = tensor<string, []>("key_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_39_strides_0 = const()[name = tensor<string, []>("key_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_39_pad_0 = const()[name = tensor<string, []>("key_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_39_dilations_0 = const()[name = tensor<string, []>("key_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_39_groups_0 = const()[name = tensor<string, []>("key_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490603072)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_39_cast_fp16 = conv(dilations = key_39_dilations_0, groups = key_39_groups_0, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = key_39_strides_0, weight = layers_19_self_attn_k_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor<string, []>("key_39_cast_fp16")];
+            tensor<string, []> value_39_pad_type_0 = const()[name = tensor<string, []>("value_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_39_strides_0 = const()[name = tensor<string, []>("value_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_39_pad_0 = const()[name = tensor<string, []>("value_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_39_dilations_0 = const()[name = tensor<string, []>("value_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_39_groups_0 = const()[name = tensor<string, []>("value_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492700288)))];
+            tensor<fp16, [1024]> layers_19_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494797504)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_39_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_bias_to_fp16, dilations = value_39_dilations_0, groups = value_39_groups_0, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = value_39_strides_0, weight = layers_19_self_attn_v_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor<string, []>("value_39_cast_fp16")];
+            tensor<int32, [4]> var_2496 = const()[name = tensor<string, []>("op_2496"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_39_cast_fp16 = reshape(shape = var_2496, x = query_39_cast_fp16)[name = tensor<string, []>("mh_q_39_cast_fp16")];
+            tensor<fp16, []> var_2498_to_fp16 = const()[name = tensor<string, []>("op_2498_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2499_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2498_to_fp16)[name = tensor<string, []>("op_2499_cast_fp16")];
+            tensor<int32, [4]> var_2502 = const()[name = tensor<string, []>("op_2502"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2503_cast_fp16 = reshape(shape = var_2502, x = key_39_cast_fp16)[name = tensor<string, []>("op_2503_cast_fp16")];
+            tensor<bool, []> mh_w_39_transpose_x_0 = const()[name = tensor<string, []>("mh_w_39_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_39_transpose_y_0 = const()[name = tensor<string, []>("mh_w_39_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_39_cast_fp16 = matmul(transpose_x = mh_w_39_transpose_x_0, transpose_y = mh_w_39_transpose_y_0, x = var_2499_cast_fp16, y = var_2503_cast_fp16)[name = tensor<string, []>("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2506_cast_fp16 = softmax(axis = var_2438, x = mh_w_39_cast_fp16)[name = tensor<string, []>("op_2506_cast_fp16")];
+            tensor<int32, [4]> var_2507 = const()[name = tensor<string, []>("op_2507"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2508_cast_fp16 = reshape(shape = var_2507, x = value_39_cast_fp16)[name = tensor<string, []>("op_2508_cast_fp16")];
+            tensor<bool, []> attn_39_transpose_x_0 = const()[name = tensor<string, []>("attn_39_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_39_transpose_y_0 = const()[name = tensor<string, []>("attn_39_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2508_cast_fp16, y = var_2506_cast_fp16)[name = tensor<string, []>("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2511 = const()[name = tensor<string, []>("op_2511"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_153_cast_fp16 = reshape(shape = var_2511, x = attn_39_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> obj_79_pad_type_0 = const()[name = tensor<string, []>("obj_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_79_strides_0 = const()[name = tensor<string, []>("obj_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_79_pad_0 = const()[name = tensor<string, []>("obj_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_79_dilations_0 = const()[name = tensor<string, []>("obj_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_79_groups_0 = const()[name = tensor<string, []>("obj_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494799616)))];
+            tensor<fp16, [1024]> layers_19_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496896832)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_79_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_bias_to_fp16, dilations = obj_79_dilations_0, groups = obj_79_groups_0, pad = obj_79_pad_0, pad_type = obj_79_pad_type_0, strides = obj_79_strides_0, weight = layers_19_self_attn_o_proj_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("obj_79_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = obj_79_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> out_79_axes_0 = const()[name = tensor<string, []>("out_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2529_to_fp16 = const()[name = tensor<string, []>("op_2529_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_2529_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("out_79_cast_fp16")];
+            tensor<fp16, [1024]> input_155_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_155_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496898944)))];
+            tensor<fp16, [1024]> input_155_beta_0_to_fp16 = const()[name = tensor<string, []>("input_155_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496901056)))];
+            tensor<fp16, []> input_155_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_155_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> input_157_pad_type_0 = const()[name = tensor<string, []>("input_157_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_157_strides_0 = const()[name = tensor<string, []>("input_157_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_157_pad_0 = const()[name = tensor<string, []>("input_157_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_157_dilations_0 = const()[name = tensor<string, []>("input_157_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_157_groups_0 = const()[name = tensor<string, []>("input_157_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_19_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496903168)))];
+            tensor<fp16, [4096]> layers_19_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505291840)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_157_cast_fp16 = conv(bias = layers_19_fc1_bias_to_fp16, dilations = input_157_dilations_0, groups = input_157_groups_0, pad = input_157_pad_0, pad_type = input_157_pad_type_0, strides = input_157_strides_0, weight = layers_19_fc1_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_mode_0 = const()[name = tensor<string, []>("input_159_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> hidden_states_43_pad_type_0 = const()[name = tensor<string, []>("hidden_states_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_43_strides_0 = const()[name = tensor<string, []>("hidden_states_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_43_pad_0 = const()[name = tensor<string, []>("hidden_states_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_43_dilations_0 = const()[name = tensor<string, []>("hidden_states_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_43_groups_0 = const()[name = tensor<string, []>("hidden_states_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_19_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505300096)))];
+            tensor<fp16, [1024]> layers_19_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513688768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_43_cast_fp16 = conv(bias = layers_19_fc2_bias_to_fp16, dilations = hidden_states_43_dilations_0, groups = hidden_states_43_groups_0, pad = hidden_states_43_pad_0, pad_type = hidden_states_43_pad_type_0, strides = hidden_states_43_strides_0, weight = layers_19_fc2_weight_to_fp16, x = input_159_cast_fp16)[name = tensor<string, []>("hidden_states_43_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_2558 = const()[name = tensor<string, []>("op_2558"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_81_axes_0 = const()[name = tensor<string, []>("out_81_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2580_to_fp16 = const()[name = tensor<string, []>("op_2580_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_81_cast_fp16 = layer_norm(axes = out_81_axes_0, epsilon = var_2580_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("out_81_cast_fp16")];
+            tensor<fp16, [1024]> obj_81_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_81_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513690880)))];
+            tensor<fp16, [1024]> obj_81_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_81_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513692992)))];
+            tensor<fp16, []> obj_81_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_81_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_81_cast_fp16 = batch_norm(beta = obj_81_beta_0_to_fp16, epsilon = obj_81_epsilon_0_to_fp16, gamma = obj_81_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = tensor<string, []>("obj_81_cast_fp16")];
+            tensor<string, []> query_41_pad_type_0 = const()[name = tensor<string, []>("query_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_41_strides_0 = const()[name = tensor<string, []>("query_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_41_pad_0 = const()[name = tensor<string, []>("query_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_41_dilations_0 = const()[name = tensor<string, []>("query_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_41_groups_0 = const()[name = tensor<string, []>("query_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513695104)))];
+            tensor<fp16, [1024]> layers_20_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515792320)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_41_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_bias_to_fp16, dilations = query_41_dilations_0, groups = query_41_groups_0, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = query_41_strides_0, weight = layers_20_self_attn_q_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor<string, []>("query_41_cast_fp16")];
+            tensor<string, []> key_41_pad_type_0 = const()[name = tensor<string, []>("key_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_41_strides_0 = const()[name = tensor<string, []>("key_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_41_pad_0 = const()[name = tensor<string, []>("key_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_41_dilations_0 = const()[name = tensor<string, []>("key_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_41_groups_0 = const()[name = tensor<string, []>("key_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515794432)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_41_cast_fp16 = conv(dilations = key_41_dilations_0, groups = key_41_groups_0, pad = key_41_pad_0, pad_type = key_41_pad_type_0, strides = key_41_strides_0, weight = layers_20_self_attn_k_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor<string, []>("key_41_cast_fp16")];
+            tensor<string, []> value_41_pad_type_0 = const()[name = tensor<string, []>("value_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_41_strides_0 = const()[name = tensor<string, []>("value_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_41_pad_0 = const()[name = tensor<string, []>("value_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_41_dilations_0 = const()[name = tensor<string, []>("value_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_41_groups_0 = const()[name = tensor<string, []>("value_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517891648)))];
+            tensor<fp16, [1024]> layers_20_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519988864)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_41_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_bias_to_fp16, dilations = value_41_dilations_0, groups = value_41_groups_0, pad = value_41_pad_0, pad_type = value_41_pad_type_0, strides = value_41_strides_0, weight = layers_20_self_attn_v_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor<string, []>("value_41_cast_fp16")];
+            tensor<int32, [4]> var_2616 = const()[name = tensor<string, []>("op_2616"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_41_cast_fp16 = reshape(shape = var_2616, x = query_41_cast_fp16)[name = tensor<string, []>("mh_q_41_cast_fp16")];
+            tensor<fp16, []> var_2618_to_fp16 = const()[name = tensor<string, []>("op_2618_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2619_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_2618_to_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<int32, [4]> var_2622 = const()[name = tensor<string, []>("op_2622"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2623_cast_fp16 = reshape(shape = var_2622, x = key_41_cast_fp16)[name = tensor<string, []>("op_2623_cast_fp16")];
+            tensor<bool, []> mh_w_41_transpose_x_0 = const()[name = tensor<string, []>("mh_w_41_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_41_transpose_y_0 = const()[name = tensor<string, []>("mh_w_41_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_2619_cast_fp16, y = var_2623_cast_fp16)[name = tensor<string, []>("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2626_cast_fp16 = softmax(axis = var_2558, x = mh_w_41_cast_fp16)[name = tensor<string, []>("op_2626_cast_fp16")];
+            tensor<int32, [4]> var_2627 = const()[name = tensor<string, []>("op_2627"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2628_cast_fp16 = reshape(shape = var_2627, x = value_41_cast_fp16)[name = tensor<string, []>("op_2628_cast_fp16")];
+            tensor<bool, []> attn_41_transpose_x_0 = const()[name = tensor<string, []>("attn_41_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_41_transpose_y_0 = const()[name = tensor<string, []>("attn_41_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_2628_cast_fp16, y = var_2626_cast_fp16)[name = tensor<string, []>("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_2631 = const()[name = tensor<string, []>("op_2631"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_161_cast_fp16 = reshape(shape = var_2631, x = attn_41_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> obj_83_pad_type_0 = const()[name = tensor<string, []>("obj_83_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_83_strides_0 = const()[name = tensor<string, []>("obj_83_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_83_pad_0 = const()[name = tensor<string, []>("obj_83_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_83_dilations_0 = const()[name = tensor<string, []>("obj_83_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_83_groups_0 = const()[name = tensor<string, []>("obj_83_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519990976)))];
+            tensor<fp16, [1024]> layers_20_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522088192)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_83_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_bias_to_fp16, dilations = obj_83_dilations_0, groups = obj_83_groups_0, pad = obj_83_pad_0, pad_type = obj_83_pad_type_0, strides = obj_83_strides_0, weight = layers_20_self_attn_o_proj_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("obj_83_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_83_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> out_83_axes_0 = const()[name = tensor<string, []>("out_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2649_to_fp16 = const()[name = tensor<string, []>("op_2649_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_83_cast_fp16 = layer_norm(axes = out_83_axes_0, epsilon = var_2649_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("out_83_cast_fp16")];
+            tensor<fp16, [1024]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522090304)))];
+            tensor<fp16, [1024]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522092416)))];
+            tensor<fp16, []> input_163_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_163_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_163_cast_fp16 = batch_norm(beta = input_163_beta_0_to_fp16, epsilon = input_163_epsilon_0_to_fp16, gamma = input_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> input_165_pad_type_0 = const()[name = tensor<string, []>("input_165_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_165_strides_0 = const()[name = tensor<string, []>("input_165_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_165_pad_0 = const()[name = tensor<string, []>("input_165_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_165_dilations_0 = const()[name = tensor<string, []>("input_165_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_165_groups_0 = const()[name = tensor<string, []>("input_165_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_20_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522094528)))];
+            tensor<fp16, [4096]> layers_20_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530483200)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_165_cast_fp16 = conv(bias = layers_20_fc1_bias_to_fp16, dilations = input_165_dilations_0, groups = input_165_groups_0, pad = input_165_pad_0, pad_type = input_165_pad_type_0, strides = input_165_strides_0, weight = layers_20_fc1_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> input_167_mode_0 = const()[name = tensor<string, []>("input_167_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_167_cast_fp16 = gelu(mode = input_167_mode_0, x = input_165_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> hidden_states_45_pad_type_0 = const()[name = tensor<string, []>("hidden_states_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_45_strides_0 = const()[name = tensor<string, []>("hidden_states_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_45_pad_0 = const()[name = tensor<string, []>("hidden_states_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_45_dilations_0 = const()[name = tensor<string, []>("hidden_states_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_45_groups_0 = const()[name = tensor<string, []>("hidden_states_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_20_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530491456)))];
+            tensor<fp16, [1024]> layers_20_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538880128)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_45_cast_fp16 = conv(bias = layers_20_fc2_bias_to_fp16, dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = layers_20_fc2_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("hidden_states_45_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_45_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_2678 = const()[name = tensor<string, []>("op_2678"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_85_axes_0 = const()[name = tensor<string, []>("out_85_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2700_to_fp16 = const()[name = tensor<string, []>("op_2700_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_2700_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("out_85_cast_fp16")];
+            tensor<fp16, [1024]> obj_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_85_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538882240)))];
+            tensor<fp16, [1024]> obj_85_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_85_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538884352)))];
+            tensor<fp16, []> obj_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = tensor<string, []>("obj_85_cast_fp16")];
+            tensor<string, []> query_43_pad_type_0 = const()[name = tensor<string, []>("query_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_43_strides_0 = const()[name = tensor<string, []>("query_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_43_pad_0 = const()[name = tensor<string, []>("query_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_43_dilations_0 = const()[name = tensor<string, []>("query_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_43_groups_0 = const()[name = tensor<string, []>("query_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538886464)))];
+            tensor<fp16, [1024]> layers_21_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540983680)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_43_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_bias_to_fp16, dilations = query_43_dilations_0, groups = query_43_groups_0, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = query_43_strides_0, weight = layers_21_self_attn_q_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("query_43_cast_fp16")];
+            tensor<string, []> key_43_pad_type_0 = const()[name = tensor<string, []>("key_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_43_strides_0 = const()[name = tensor<string, []>("key_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_43_pad_0 = const()[name = tensor<string, []>("key_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_43_dilations_0 = const()[name = tensor<string, []>("key_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_43_groups_0 = const()[name = tensor<string, []>("key_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540985792)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_43_cast_fp16 = conv(dilations = key_43_dilations_0, groups = key_43_groups_0, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = key_43_strides_0, weight = layers_21_self_attn_k_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("key_43_cast_fp16")];
+            tensor<string, []> value_43_pad_type_0 = const()[name = tensor<string, []>("value_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_43_strides_0 = const()[name = tensor<string, []>("value_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_43_pad_0 = const()[name = tensor<string, []>("value_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_43_dilations_0 = const()[name = tensor<string, []>("value_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_43_groups_0 = const()[name = tensor<string, []>("value_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543083008)))];
+            tensor<fp16, [1024]> layers_21_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545180224)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_43_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_bias_to_fp16, dilations = value_43_dilations_0, groups = value_43_groups_0, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = value_43_strides_0, weight = layers_21_self_attn_v_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("value_43_cast_fp16")];
+            tensor<int32, [4]> var_2736 = const()[name = tensor<string, []>("op_2736"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_43_cast_fp16 = reshape(shape = var_2736, x = query_43_cast_fp16)[name = tensor<string, []>("mh_q_43_cast_fp16")];
+            tensor<fp16, []> var_2738_to_fp16 = const()[name = tensor<string, []>("op_2738_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2739_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_2738_to_fp16)[name = tensor<string, []>("op_2739_cast_fp16")];
+            tensor<int32, [4]> var_2742 = const()[name = tensor<string, []>("op_2742"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2743_cast_fp16 = reshape(shape = var_2742, x = key_43_cast_fp16)[name = tensor<string, []>("op_2743_cast_fp16")];
+            tensor<bool, []> mh_w_43_transpose_x_0 = const()[name = tensor<string, []>("mh_w_43_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_43_transpose_y_0 = const()[name = tensor<string, []>("mh_w_43_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_2739_cast_fp16, y = var_2743_cast_fp16)[name = tensor<string, []>("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2746_cast_fp16 = softmax(axis = var_2678, x = mh_w_43_cast_fp16)[name = tensor<string, []>("op_2746_cast_fp16")];
+            tensor<int32, [4]> var_2747 = const()[name = tensor<string, []>("op_2747"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2748_cast_fp16 = reshape(shape = var_2747, x = value_43_cast_fp16)[name = tensor<string, []>("op_2748_cast_fp16")];
+            tensor<bool, []> attn_43_transpose_x_0 = const()[name = tensor<string, []>("attn_43_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_43_transpose_y_0 = const()[name = tensor<string, []>("attn_43_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_2748_cast_fp16, y = var_2746_cast_fp16)[name = tensor<string, []>("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_2751 = const()[name = tensor<string, []>("op_2751"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_169_cast_fp16 = reshape(shape = var_2751, x = attn_43_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> obj_87_pad_type_0 = const()[name = tensor<string, []>("obj_87_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_87_strides_0 = const()[name = tensor<string, []>("obj_87_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_87_pad_0 = const()[name = tensor<string, []>("obj_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_87_dilations_0 = const()[name = tensor<string, []>("obj_87_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_87_groups_0 = const()[name = tensor<string, []>("obj_87_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545182336)))];
+            tensor<fp16, [1024]> layers_21_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547279552)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_87_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_bias_to_fp16, dilations = obj_87_dilations_0, groups = obj_87_groups_0, pad = obj_87_pad_0, pad_type = obj_87_pad_type_0, strides = obj_87_strides_0, weight = layers_21_self_attn_o_proj_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("obj_87_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_87_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> out_87_axes_0 = const()[name = tensor<string, []>("out_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2769_to_fp16 = const()[name = tensor<string, []>("op_2769_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_87_cast_fp16 = layer_norm(axes = out_87_axes_0, epsilon = var_2769_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("out_87_cast_fp16")];
+            tensor<fp16, [1024]> input_171_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_171_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547281664)))];
+            tensor<fp16, [1024]> input_171_beta_0_to_fp16 = const()[name = tensor<string, []>("input_171_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547283776)))];
+            tensor<fp16, []> input_171_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_171_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_171_cast_fp16 = batch_norm(beta = input_171_beta_0_to_fp16, epsilon = input_171_epsilon_0_to_fp16, gamma = input_171_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> input_173_pad_type_0 = const()[name = tensor<string, []>("input_173_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_173_strides_0 = const()[name = tensor<string, []>("input_173_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_173_pad_0 = const()[name = tensor<string, []>("input_173_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_173_dilations_0 = const()[name = tensor<string, []>("input_173_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_173_groups_0 = const()[name = tensor<string, []>("input_173_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_21_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547285888)))];
+            tensor<fp16, [4096]> layers_21_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555674560)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_173_cast_fp16 = conv(bias = layers_21_fc1_bias_to_fp16, dilations = input_173_dilations_0, groups = input_173_groups_0, pad = input_173_pad_0, pad_type = input_173_pad_type_0, strides = input_173_strides_0, weight = layers_21_fc1_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> input_175_mode_0 = const()[name = tensor<string, []>("input_175_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_175_cast_fp16 = gelu(mode = input_175_mode_0, x = input_173_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> hidden_states_47_pad_type_0 = const()[name = tensor<string, []>("hidden_states_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = tensor<string, []>("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = tensor<string, []>("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = tensor<string, []>("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_47_groups_0 = const()[name = tensor<string, []>("hidden_states_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_21_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555682816)))];
+            tensor<fp16, [1024]> layers_21_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564071488)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_47_cast_fp16 = conv(bias = layers_21_fc2_bias_to_fp16, dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = layers_21_fc2_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("hidden_states_47_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = hidden_states_47_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_2798 = const()[name = tensor<string, []>("op_2798"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_89_axes_0 = const()[name = tensor<string, []>("out_89_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2820_to_fp16 = const()[name = tensor<string, []>("op_2820_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_89_cast_fp16 = layer_norm(axes = out_89_axes_0, epsilon = var_2820_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("out_89_cast_fp16")];
+            tensor<fp16, [1024]> obj_89_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_89_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564073600)))];
+            tensor<fp16, [1024]> obj_89_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_89_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564075712)))];
+            tensor<fp16, []> obj_89_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_89_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_89_cast_fp16 = batch_norm(beta = obj_89_beta_0_to_fp16, epsilon = obj_89_epsilon_0_to_fp16, gamma = obj_89_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = tensor<string, []>("obj_89_cast_fp16")];
+            tensor<string, []> query_45_pad_type_0 = const()[name = tensor<string, []>("query_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_45_strides_0 = const()[name = tensor<string, []>("query_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_45_pad_0 = const()[name = tensor<string, []>("query_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_45_dilations_0 = const()[name = tensor<string, []>("query_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_45_groups_0 = const()[name = tensor<string, []>("query_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564077824)))];
+            tensor<fp16, [1024]> layers_22_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566175040)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_45_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_bias_to_fp16, dilations = query_45_dilations_0, groups = query_45_groups_0, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = query_45_strides_0, weight = layers_22_self_attn_q_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor<string, []>("query_45_cast_fp16")];
+            tensor<string, []> key_45_pad_type_0 = const()[name = tensor<string, []>("key_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_45_strides_0 = const()[name = tensor<string, []>("key_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_45_pad_0 = const()[name = tensor<string, []>("key_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_45_dilations_0 = const()[name = tensor<string, []>("key_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_45_groups_0 = const()[name = tensor<string, []>("key_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566177152)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_45_cast_fp16 = conv(dilations = key_45_dilations_0, groups = key_45_groups_0, pad = key_45_pad_0, pad_type = key_45_pad_type_0, strides = key_45_strides_0, weight = layers_22_self_attn_k_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor<string, []>("key_45_cast_fp16")];
+            tensor<string, []> value_45_pad_type_0 = const()[name = tensor<string, []>("value_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_45_strides_0 = const()[name = tensor<string, []>("value_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_45_pad_0 = const()[name = tensor<string, []>("value_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_45_dilations_0 = const()[name = tensor<string, []>("value_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_45_groups_0 = const()[name = tensor<string, []>("value_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568274368)))];
+            tensor<fp16, [1024]> layers_22_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570371584)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_45_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_bias_to_fp16, dilations = value_45_dilations_0, groups = value_45_groups_0, pad = value_45_pad_0, pad_type = value_45_pad_type_0, strides = value_45_strides_0, weight = layers_22_self_attn_v_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor<string, []>("value_45_cast_fp16")];
+            tensor<int32, [4]> var_2856 = const()[name = tensor<string, []>("op_2856"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_45_cast_fp16 = reshape(shape = var_2856, x = query_45_cast_fp16)[name = tensor<string, []>("mh_q_45_cast_fp16")];
+            tensor<fp16, []> var_2858_to_fp16 = const()[name = tensor<string, []>("op_2858_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2859_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_2858_to_fp16)[name = tensor<string, []>("op_2859_cast_fp16")];
+            tensor<int32, [4]> var_2862 = const()[name = tensor<string, []>("op_2862"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2863_cast_fp16 = reshape(shape = var_2862, x = key_45_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<bool, []> mh_w_45_transpose_x_0 = const()[name = tensor<string, []>("mh_w_45_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_45_transpose_y_0 = const()[name = tensor<string, []>("mh_w_45_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_2859_cast_fp16, y = var_2863_cast_fp16)[name = tensor<string, []>("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2866_cast_fp16 = softmax(axis = var_2798, x = mh_w_45_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2867 = const()[name = tensor<string, []>("op_2867"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2868_cast_fp16 = reshape(shape = var_2867, x = value_45_cast_fp16)[name = tensor<string, []>("op_2868_cast_fp16")];
+            tensor<bool, []> attn_45_transpose_x_0 = const()[name = tensor<string, []>("attn_45_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_45_transpose_y_0 = const()[name = tensor<string, []>("attn_45_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_2868_cast_fp16, y = var_2866_cast_fp16)[name = tensor<string, []>("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_2871 = const()[name = tensor<string, []>("op_2871"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_177_cast_fp16 = reshape(shape = var_2871, x = attn_45_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> obj_91_pad_type_0 = const()[name = tensor<string, []>("obj_91_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_91_strides_0 = const()[name = tensor<string, []>("obj_91_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_91_pad_0 = const()[name = tensor<string, []>("obj_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_91_dilations_0 = const()[name = tensor<string, []>("obj_91_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_91_groups_0 = const()[name = tensor<string, []>("obj_91_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570373696)))];
+            tensor<fp16, [1024]> layers_22_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572470912)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_91_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_bias_to_fp16, dilations = obj_91_dilations_0, groups = obj_91_groups_0, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = obj_91_strides_0, weight = layers_22_self_attn_o_proj_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("obj_91_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = obj_91_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> out_91_axes_0 = const()[name = tensor<string, []>("out_91_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2889_to_fp16 = const()[name = tensor<string, []>("op_2889_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_2889_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("out_91_cast_fp16")];
+            tensor<fp16, [1024]> input_179_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_179_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572473024)))];
+            tensor<fp16, [1024]> input_179_beta_0_to_fp16 = const()[name = tensor<string, []>("input_179_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572475136)))];
+            tensor<fp16, []> input_179_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_179_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_179_cast_fp16 = batch_norm(beta = input_179_beta_0_to_fp16, epsilon = input_179_epsilon_0_to_fp16, gamma = input_179_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_pad_type_0 = const()[name = tensor<string, []>("input_181_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_181_strides_0 = const()[name = tensor<string, []>("input_181_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_181_pad_0 = const()[name = tensor<string, []>("input_181_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_181_dilations_0 = const()[name = tensor<string, []>("input_181_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_181_groups_0 = const()[name = tensor<string, []>("input_181_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_22_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572477248)))];
+            tensor<fp16, [4096]> layers_22_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580865920)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_181_cast_fp16 = conv(bias = layers_22_fc1_bias_to_fp16, dilations = input_181_dilations_0, groups = input_181_groups_0, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = input_181_strides_0, weight = layers_22_fc1_weight_to_fp16, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> input_183_mode_0 = const()[name = tensor<string, []>("input_183_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_183_cast_fp16 = gelu(mode = input_183_mode_0, x = input_181_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> hidden_states_49_pad_type_0 = const()[name = tensor<string, []>("hidden_states_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_49_strides_0 = const()[name = tensor<string, []>("hidden_states_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_49_pad_0 = const()[name = tensor<string, []>("hidden_states_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_49_dilations_0 = const()[name = tensor<string, []>("hidden_states_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_49_groups_0 = const()[name = tensor<string, []>("hidden_states_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_22_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580874176)))];
+            tensor<fp16, [1024]> layers_22_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589262848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_49_cast_fp16 = conv(bias = layers_22_fc2_bias_to_fp16, dilations = hidden_states_49_dilations_0, groups = hidden_states_49_groups_0, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = hidden_states_49_strides_0, weight = layers_22_fc2_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("hidden_states_49_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_2918 = const()[name = tensor<string, []>("op_2918"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_93_axes_0 = const()[name = tensor<string, []>("out_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2940_to_fp16 = const()[name = tensor<string, []>("op_2940_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_93_cast_fp16 = layer_norm(axes = out_93_axes_0, epsilon = var_2940_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("out_93_cast_fp16")];
+            tensor<fp16, [1024]> obj_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_93_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589264960)))];
+            tensor<fp16, [1024]> obj_93_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_93_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589267072)))];
+            tensor<fp16, []> obj_93_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_93_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = tensor<string, []>("obj_93_cast_fp16")];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_strides_0 = const()[name = tensor<string, []>("query_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_dilations_0 = const()[name = tensor<string, []>("query_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_groups_0 = const()[name = tensor<string, []>("query_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589269184)))];
+            tensor<fp16, [1024]> layers_23_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591366400)))];
+            tensor<fp16, [1, 1024, 1, 1500]> query_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_23_self_attn_q_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_strides_0 = const()[name = tensor<string, []>("key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_dilations_0 = const()[name = tensor<string, []>("key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_groups_0 = const()[name = tensor<string, []>("key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591368512)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_23_self_attn_k_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_strides_0 = const()[name = tensor<string, []>("value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_dilations_0 = const()[name = tensor<string, []>("value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_groups_0 = const()[name = tensor<string, []>("value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593465728)))];
+            tensor<fp16, [1024]> layers_23_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595562944)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_23_self_attn_v_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_2976 = const()[name = tensor<string, []>("op_2976"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> mh_q_cast_fp16 = reshape(shape = var_2976, x = query_cast_fp16)[name = tensor<string, []>("mh_q_cast_fp16")];
+            tensor<fp16, []> var_2978_to_fp16 = const()[name = tensor<string, []>("op_2978_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1500]> var_2979_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2978_to_fp16)[name = tensor<string, []>("op_2979_cast_fp16")];
+            tensor<int32, [4]> var_2982 = const()[name = tensor<string, []>("op_2982"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2983_cast_fp16 = reshape(shape = var_2982, x = key_cast_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1500, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2979_cast_fp16, y = var_2983_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2986_cast_fp16 = softmax(axis = var_2918, x = mh_w_cast_fp16)[name = tensor<string, []>("op_2986_cast_fp16")];
+            tensor<int32, [4]> var_2987 = const()[name = tensor<string, []>("op_2987"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2988_cast_fp16 = reshape(shape = var_2987, x = value_cast_fp16)[name = tensor<string, []>("op_2988_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1500]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2988_cast_fp16, y = var_2986_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_2991 = const()[name = tensor<string, []>("op_2991"), val = tensor<int32, [4]>([1, 1024, 1, 1500])];
+            tensor<fp16, [1, 1024, 1, 1500]> input_185_cast_fp16 = reshape(shape = var_2991, x = attn_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_strides_0 = const()[name = tensor<string, []>("obj_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_dilations_0 = const()[name = tensor<string, []>("obj_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_groups_0 = const()[name = tensor<string, []>("obj_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595565056)))];
+            tensor<fp16, [1024]> layers_23_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597662272)))];
+            tensor<fp16, [1, 1024, 1, 1500]> obj_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_23_self_attn_o_proj_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> out_95_axes_0 = const()[name = tensor<string, []>("out_95_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3009_to_fp16 = const()[name = tensor<string, []>("op_3009_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_95_cast_fp16 = layer_norm(axes = out_95_axes_0, epsilon = var_3009_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("out_95_cast_fp16")];
+            tensor<fp16, [1024]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597664384)))];
+            tensor<fp16, [1024]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597666496)))];
+            tensor<fp16, []> input_187_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_187_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_187_cast_fp16 = batch_norm(beta = input_187_beta_0_to_fp16, epsilon = input_187_epsilon_0_to_fp16, gamma = input_187_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_23_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597668608)))];
+            tensor<fp16, [4096]> layers_23_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606057280)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_189_cast_fp16 = conv(bias = layers_23_fc1_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = layers_23_fc1_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_strides_0 = const()[name = tensor<string, []>("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = tensor<string, []>("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_groups_0 = const()[name = tensor<string, []>("hidden_states_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_23_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606065536)))];
+            tensor<fp16, [1024]> layers_23_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614454208)))];
+            tensor<fp16, [1, 1024, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_23_fc2_bias_to_fp16, dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = layers_23_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = tensor<string, []>("out_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3047_to_fp16 = const()[name = tensor<string, []>("op_3047_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3047_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [1024]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614456320)))];
+            tensor<fp16, [1024]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614458432)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-medium/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c8beb32179ea8db553721fbafdb7024f2bcb79a1
--- /dev/null
+++ b/openai_whisper-medium/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:577c78ed7e0ae71f9ed6fdb063dc74a0f4c0c44d04118111650458973f7ddae6
+size 614460544
diff --git a/openai_whisper-medium/MelSpectrogram.mlcomputeplan.json b/openai_whisper-medium/MelSpectrogram.mlcomputeplan.json
new file mode 100644
index 0000000000000000000000000000000000000000..717cb6a9aa4e752c90d26b278ccbb65e900fe5a5
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlcomputeplan.json
@@ -0,0 +1,199 @@
+{
+    "1_ios16.reshape_shape_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.2206
+    },
+    "5_pad_pad_x_constant_val_mode": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU"
+        ],
+        "cost": 2.2216
+    },
+    "7_ios16.reshape_shape_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.2225
+    },
+    "9_expand_dims_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.2225
+    },
+    "12_expand_dims_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.2225
+    },
+    "18_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU"
+        ],
+        "cost": 21.5236
+    },
+    "24_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU"
+        ],
+        "cost": 21.5236
+    },
+    "26_squeeze_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7906
+    },
+    "28_squeeze_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7906
+    },
+    "29_ios16.square_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7906
+    },
+    "30_ios16.square_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7906
+    },
+    "31_ios16.add_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 10.1532
+    },
+    "32_identity_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7906
+    },
+    "36_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.7901
+    },
+    "40_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 4.0374
+    },
+    "42_ios16.add_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.0199
+    },
+    "44_ios16.log_epsilon_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.1103
+    },
+    "46_ios16.mul_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.9367
+    },
+    "48_ios16.reduce_max_x_keep_dims": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.5552
+    },
+    "50_ios16.sub_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0
+    },
+    "51_ios16.maximum_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.1103
+    },
+    "53_ios16.add_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.0199
+    },
+    "55_ios16.mul_x_y": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 2.9367
+    },
+    "57_expand_dims_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.1103
+    },
+    "59_expand_dims_x_axes": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.1103
+    }
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-medium/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..00936d0b7c298845d0d086d6e77b355a30593775
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:847173d00b238eb63f0d3dab36dc52b7a3fd1d4b31e95d4093975fada8fde2cd
+size 243
diff --git a/openai_whisper-medium/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-medium/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5a8a736f06d0c274e91848b163a53983cc322b81
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebbb4bf9699f7d65018645e168ecedf47541eee154800f9e9766f1ac90746416
+size 328
diff --git a/openai_whisper-medium/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-medium/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7fca7bd620d8aca6289b02550d8fe9c0cd9f8d6
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,74 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios16.reshape" : 2,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.square" : 2,
+      "Ios16.add" : 3,
+      "Squeeze" : 2,
+      "Ios16.matmul" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Pad" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.2",
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-medium/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-medium/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..20a1b38b6e6cf850afd0ba3614d7a4396bc8e4d9
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3401.3.1"}, {"coremlc-version", "3401.4.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-medium/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7d28ffac464e9e7086a526930f0059187de8d01
--- /dev/null
+++ b/openai_whisper-medium/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:801024dbc7a89c677be1f8b285de3409e35f7d1786c9c8d9d0d6842ac57a1c83
+size 354080
diff --git a/openai_whisper-medium/TextDecoder.mlcomputeplan.json b/openai_whisper-medium/TextDecoder.mlcomputeplan.json
new file mode 100644
index 0000000000000000000000000000000000000000..2671ee34e44cdc9b83023b6663d106b8ebaf06e6
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlcomputeplan.json
@@ -0,0 +1,8520 @@
+{
+    "3_ios16.gather_axis_batch_dims_indices_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU"
+        ],
+        "cost": 11.6709
+    },
+    "7_ios16.gather_axis_batch_dims_indices_x": {
+        "dispatch": "CPU",
+        "supported": [
+            "CPU"
+        ],
+        "cost": 0.1008
+    },
+    "8_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "10_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "12_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "15_split_axis_x_split_sizes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7941
+    },
+    "18_split_axis_x_split_sizes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.7941
+    },
+    "22_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "28_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "36_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "43_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "51_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "53_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "55_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "57_ios16.sub_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "58_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "59_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "60_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "61_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "62_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "63_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "65_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "67_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "69_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "72_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "74_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "76_expand_dims_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "77_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "78_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "80_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "83_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "85_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "93_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "94_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "97_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "101_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "109_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "116_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "124_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "126_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "128_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "130_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "133_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "134_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "136_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "139_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "141_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "149_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "150_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "153_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "157_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "165_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "167_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "175_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "176_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "180_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "184_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "192_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "199_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "207_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "208_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "209_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "210_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "211_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "212_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "213_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "215_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "217_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "219_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "222_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "223_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "224_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "226_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "229_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "231_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "239_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "240_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "243_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "247_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "255_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "262_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "270_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "272_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "274_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "276_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "279_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "280_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "282_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "285_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "287_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "295_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "296_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "299_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "303_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "311_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "313_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "321_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "322_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "326_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "330_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "338_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "345_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "353_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "354_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "355_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "356_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "357_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "358_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "359_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "361_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "363_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "365_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "368_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "369_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "370_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "372_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "375_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "377_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "385_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "386_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "389_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "393_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "401_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "408_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "416_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "418_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "420_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "422_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "425_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "426_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "428_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "431_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "433_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "441_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "442_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "445_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "449_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "457_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "459_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "467_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "468_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "472_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "476_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "484_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "491_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "499_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "500_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "501_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "502_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "503_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "504_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "505_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "507_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "509_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "511_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "514_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "515_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "516_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "518_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "521_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "523_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "531_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "532_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "535_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "539_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "547_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "554_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "562_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "564_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "566_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "568_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "571_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "572_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "574_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "577_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "579_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "587_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "588_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "591_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "595_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "603_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "605_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "613_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "614_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "618_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "622_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "630_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "637_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "645_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "646_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "647_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "648_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "649_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "650_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "651_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "653_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "655_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "657_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "660_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "661_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "662_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "664_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "667_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "669_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "677_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "678_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "681_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "685_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "693_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "700_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "708_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "710_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "712_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "714_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "717_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "718_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "720_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "723_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "725_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "733_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "734_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "737_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "741_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "749_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "751_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "759_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "760_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "764_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "768_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "776_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "783_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "791_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "792_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "793_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "794_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "795_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "796_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "797_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "799_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "801_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "803_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "806_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "807_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "808_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "810_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "813_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "815_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "823_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "824_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "827_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "831_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "839_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "846_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "854_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "856_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "858_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "860_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "863_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "864_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "866_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "869_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "871_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "879_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "880_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "883_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "887_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "895_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "897_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "905_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "906_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "910_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "914_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "922_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "929_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "937_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "938_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "939_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "940_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "941_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "942_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "943_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "945_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "947_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "949_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "952_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "953_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "954_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "956_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "959_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "961_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "969_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "970_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "973_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "977_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "985_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "992_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1000_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1002_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1004_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1006_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1009_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1010_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1012_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1015_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1017_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1025_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1026_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1029_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1033_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1041_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1043_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1051_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1052_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1056_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1060_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1068_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1075_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1083_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1084_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1085_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1086_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1087_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1088_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1089_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1091_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1093_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1095_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1098_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1099_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1100_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1102_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1105_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1107_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1115_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1116_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1119_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1123_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1131_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1138_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1146_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1148_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1150_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1152_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1155_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1156_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1158_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1161_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1163_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1171_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1172_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1175_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1179_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1187_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1189_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1197_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1198_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1202_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1206_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1214_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1221_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1229_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1230_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1231_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1232_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1233_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1234_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1235_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1237_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1239_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1241_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1244_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1245_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1246_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1248_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1251_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1253_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1261_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1262_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1265_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1269_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1277_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1284_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1292_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1294_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1296_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1298_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1301_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1302_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1304_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1307_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1309_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1317_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1318_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1321_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1325_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1333_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1335_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1343_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1344_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1348_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1352_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1360_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1367_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1375_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1376_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1377_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1378_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1379_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1380_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1381_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1383_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1385_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1387_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1390_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1391_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1392_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1394_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1397_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1399_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1407_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1408_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1411_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1415_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1423_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1430_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1438_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1440_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1442_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1444_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1447_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1448_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1450_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1453_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1455_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1463_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1464_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1467_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1471_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1479_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1481_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1489_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1490_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1494_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1498_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1506_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1513_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1521_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1522_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1523_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1524_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1525_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1526_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1527_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1529_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1531_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1533_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1536_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1537_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1538_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1540_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1543_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1545_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1553_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1554_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1557_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1561_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1569_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1576_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1584_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1586_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1588_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1590_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1593_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1594_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1596_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1599_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1601_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1609_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1610_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1613_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1617_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1625_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1627_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1635_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1636_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1640_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1644_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1652_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1659_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1667_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1668_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1669_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1670_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1671_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1672_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1673_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1675_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1677_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1679_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1682_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1683_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1684_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1686_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1689_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1691_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1699_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1700_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1703_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1707_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1715_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1722_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1730_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1732_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1734_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1736_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1739_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1740_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1742_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1745_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1747_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1755_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1756_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1759_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1763_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1771_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1773_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1781_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1782_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1786_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1790_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1798_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1805_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1813_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1814_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1815_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1816_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1817_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1818_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1819_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1821_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1823_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1825_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1828_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1829_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1830_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1832_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1835_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1837_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1845_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1846_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1849_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1853_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1861_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1868_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "1876_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "1878_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1880_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1882_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1885_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1886_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "1888_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "1891_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "1893_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1901_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1902_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1905_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1909_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1917_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1919_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "1927_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "1928_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1932_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1936_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1944_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1951_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1959_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1960_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1961_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1962_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1963_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "1964_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "1965_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "1967_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1969_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1971_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1974_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1975_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "1976_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "1978_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "1981_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "1983_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1991_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "1992_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "1995_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "1999_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2007_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2014_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2022_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2024_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2026_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2028_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2031_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2032_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2034_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2037_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2039_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2047_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2048_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2051_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2055_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2063_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2065_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2073_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2074_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2078_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2082_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2090_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2097_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2105_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2106_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2107_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2108_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2109_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2110_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2111_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2113_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2115_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2117_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2120_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2121_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2122_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2124_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2127_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2129_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2137_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2138_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2141_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2145_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2153_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2160_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2168_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2170_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2172_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2174_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2177_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2178_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2180_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2183_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2185_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2193_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2194_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2197_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2201_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2209_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2211_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2219_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2220_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2224_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2228_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2236_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2243_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2251_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2252_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2253_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2254_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2255_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2256_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2257_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2259_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2261_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2263_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2266_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2267_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2268_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2270_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2273_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2275_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2283_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2284_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2287_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2291_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2299_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2306_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2314_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2316_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2318_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2320_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2323_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2324_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2326_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2329_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2331_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2339_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2340_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2343_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2347_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2355_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2357_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2365_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2366_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2370_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2374_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2382_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2389_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2397_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2398_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2399_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2400_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2401_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2402_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2403_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2405_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2407_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2409_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2412_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2413_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2414_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2416_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2419_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2421_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2429_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2430_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2433_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2437_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2445_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2452_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2460_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2462_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2464_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2466_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2469_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2470_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2472_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2475_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2477_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2485_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2486_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2489_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2493_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2501_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2503_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2511_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2512_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2516_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2520_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2528_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2535_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2543_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2544_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2545_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2546_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2547_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2548_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2549_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2551_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2553_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2555_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2558_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2559_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2560_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2562_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2565_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2567_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2575_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2576_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2579_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2583_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2591_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2598_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2606_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2608_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2610_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2612_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2615_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2616_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2618_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2621_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2623_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2631_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2632_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2635_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2639_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2647_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2649_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2657_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2658_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2662_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2666_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2674_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2681_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2689_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2690_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2691_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2692_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2693_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2694_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2695_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2697_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2699_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2701_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2704_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2705_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2706_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2708_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2711_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2713_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2721_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2722_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2725_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2729_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2737_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2744_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2752_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2754_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2756_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2758_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2761_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2762_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2764_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2767_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2769_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2777_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2778_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2781_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2785_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2793_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2795_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2803_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2804_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2808_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2812_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2820_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2827_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2835_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2836_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2837_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2838_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2839_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2840_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2841_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2843_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2845_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2847_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2850_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2851_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2852_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "2854_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2857_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2859_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2867_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2868_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2871_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2875_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2883_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2890_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "2898_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "2900_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2902_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2904_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2907_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2908_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "2910_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "2913_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "2915_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2923_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2924_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2927_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2931_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2939_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2941_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "2949_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "2950_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "2954_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2958_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2966_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2973_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2981_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "2982_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2983_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2984_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2985_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "2986_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "2987_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "2989_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2991_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "2993_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "2996_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "2997_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "2998_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "3000_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3003_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3005_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3013_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3014_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3017_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3021_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3029_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3036_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "3044_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "3046_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3048_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3050_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3053_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3054_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "3056_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3059_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3061_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3069_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3070_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3073_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3077_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3085_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3087_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "3095_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3096_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3100_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3104_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3112_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3119_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3127_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3128_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3129_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3130_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3131_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3132_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3133_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3135_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3137_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3139_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3142_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3143_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "3144_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "3146_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3149_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3151_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3159_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3160_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3163_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3167_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3175_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3182_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "3190_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "3192_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3194_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3196_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3199_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3200_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "3202_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3205_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3207_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3215_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3216_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3219_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3223_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3231_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3233_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "3241_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3242_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3246_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3250_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3258_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3265_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3273_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3274_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3275_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3276_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3277_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3278_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3279_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3281_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3283_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3285_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3288_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3289_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "3290_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "3292_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3295_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3297_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3305_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3306_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3309_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3313_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3321_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3328_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "3336_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "3338_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3340_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3342_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3345_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3346_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "3348_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3351_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3353_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3361_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3362_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3365_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3369_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3377_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3379_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "3387_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3388_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3392_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3396_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3404_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3411_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3419_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3420_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3421_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3422_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3423_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.058
+    },
+    "3424_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3425_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1413
+    },
+    "3427_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3429_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3431_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3434_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3435_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "3436_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "3438_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0635
+    },
+    "3441_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0323
+    },
+    "3443_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3451_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3452_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3455_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3459_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3467_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3474_ios16.conv_groups_weight_x_dilations_strides_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0602
+    },
+    "3482_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 1.0607
+    },
+    "3484_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3486_ios16.mul_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3488_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3491_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3492_ios16.softmax_axis_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0033
+    },
+    "3494_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.2127
+    },
+    "3497_ios16.matmul_y_transpose_y_transpose_x_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.1081
+    },
+    "3499_ios16.reshape_shape_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3507_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0177
+    },
+    "3508_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3511_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3515_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3523_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3525_ios16.gelu_mode_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0006
+    },
+    "3533_ios16.conv_groups_bias_weight_dilations_strides_x_pad_pad_type": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0707
+    },
+    "3534_ios16.add_x_y": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3537_ios16.layer_norm_epsilon_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3541_ios16.batch_norm_epsilon_gamma_mean_variance_x_beta": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3543_squeeze_x_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0001
+    },
+    "3546_transpose_x_perm": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0003
+    },
+    "3547_ios16.linear_weight_x_bias": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.001
+    },
+    "3550_concat_axis_values_interleave": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "3553_concat_axis_values_interleave": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0012
+    },
+    "3557_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3562_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3566_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3571_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3575_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3580_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3584_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3589_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3593_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3598_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3602_slice_by_index_begin_end_end_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0018
+    },
+    "3607_slice_by_index_begin_end_end_mask_squeeze_mask_x": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0002
+    },
+    "3610_concat_axis_values_interleave": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0005
+    },
+    "3613_ios16.reduce_mean_x_keep_dims_axes": {
+        "dispatch": "ANE",
+        "supported": [
+            "CPU",
+            "ANE"
+        ],
+        "cost": 0.0007
+    }
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-medium/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c770776da01460bfefad7fc3fb0945ebd715149
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c314ca0a7738acc7ac9f4c04da834cf46972c697bd1a6c5413c18e9d278a3c81
+size 243
diff --git a/openai_whisper-medium/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-medium/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bd98236d762ab533165274333882df43ed744680
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aed3dffd67cc0be59aa28a552d8963cb27ec29d60996ede9fc74162fa09af0a4
+size 637
diff --git a/openai_whisper-medium/TextDecoder.mlmodelc/metadata.json b/openai_whisper-medium/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..adefd81be43a1bca6b008c67d3956f59f3091de2
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,168 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 24576 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 24576, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 24576 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 24576, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios16.linear" : 1,
+      "Concat" : 3,
+      "Ios16.reduceMean" : 1,
+      "Ios16.mul" : 144,
+      "Ios16.layerNorm" : 73,
+      "SliceByIndex" : 12,
+      "Ios16.sub" : 1,
+      "Transpose" : 1,
+      "Ios16.conv" : 240,
+      "Ios16.add" : 145,
+      "Squeeze" : 1,
+      "Ios16.matmul" : 96,
+      "Ios16.softmax" : 48,
+      "Ios16.gelu" : 24,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 73,
+      "Split" : 2,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 192
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.2",
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 24576 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 24576, 1, 448]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 24576 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 24576, 1, 448]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-medium/TextDecoder.mlmodelc/model.mil b/openai_whisper-medium/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..510013572579213f5a6da9f945bca2098fb6a503
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,3620 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3401.3.1"}, {"coremlc-version", "3401.4.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, tensor<fp16, [1, 1024, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 24576, 1, 448]> key_cache, tensor<fp16, [1, 448]> kv_cache_update_mask, tensor<fp16, [1, 24576, 1, 448]> value_cache) {
+            tensor<int32, []> var_64_axis_0 = const()[name = tensor<string, []>("op_64_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_64_batch_dims_0 = const()[name = tensor<string, []>("op_64_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51865, 1024]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 1024]> var_64_cast_fp16 = gather(axis = var_64_axis_0, batch_dims = var_64_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_64_cast_fp16")];
+            tensor<int32, []> var_68_axis_0 = const()[name = tensor<string, []>("op_68_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_68_batch_dims_0 = const()[name = tensor<string, []>("op_68_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 1024]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106219648)))];
+            tensor<fp16, [1, 1024]> var_68_cast_fp16 = gather(axis = var_68_axis_0, batch_dims = var_68_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_68_cast_fp16")];
+            tensor<fp16, [1, 1024]> hidden_states_1_cast_fp16 = add(x = var_64_cast_fp16, y = var_68_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_82_axes_0 = const()[name = tensor<string, []>("op_82_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1]> var_82_cast_fp16 = expand_dims(axes = var_82_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_82_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_82_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [24]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [24]>([1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024])];
+            tensor<int32, []> var_87_axis_0 = const()[name = tensor<string, []>("op_87_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_0, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_1, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_2, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_3, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_4, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_5, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_6, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_7, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_8, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_9, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_10, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_11, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_12, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_13, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_14, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_15, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_16, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_17, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_18, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_19, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_20, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_21, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_22, tensor<fp16, [1, 1024, 1, 448]> var_87_cast_fp16_23 = split(axis = var_87_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_87_cast_fp16")];
+            tensor<int32, [24]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [24]>([1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024])];
+            tensor<int32, []> var_114_axis_0 = const()[name = tensor<string, []>("op_114_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_0, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_1, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_2, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_3, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_4, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_5, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_6, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_7, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_8, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_9, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_10, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_11, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_12, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_13, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_14, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_15, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_16, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_17, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_18, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_19, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_20, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_21, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_22, tensor<fp16, [1, 1024, 1, 448]> var_114_cast_fp16_23 = split(axis = var_114_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_114_cast_fp16")];
+            tensor<int32, []> var_144 = const()[name = tensor<string, []>("op_144"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = tensor<string, []>("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_169_to_fp16 = const()[name = tensor<string, []>("op_169_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_169_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [1024]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107137216)))];
+            tensor<fp16, [1024]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107139328)))];
+            tensor<fp16, [1024]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107141440)))];
+            tensor<fp16, [1024]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107143552)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_1_strides_0 = const()[name = tensor<string, []>("query_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_1_dilations_0 = const()[name = tensor<string, []>("query_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_1_groups_0 = const()[name = tensor<string, []>("query_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107145664)))];
+            tensor<fp16, [1024]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109242880)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_1_strides_0 = const()[name = tensor<string, []>("current_key_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_1_dilations_0 = const()[name = tensor<string, []>("current_key_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_1_groups_0 = const()[name = tensor<string, []>("current_key_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109244992)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_1_strides_0 = const()[name = tensor<string, []>("current_value_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_1_dilations_0 = const()[name = tensor<string, []>("current_value_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_1_groups_0 = const()[name = tensor<string, []>("current_value_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111342208)))];
+            tensor<fp16, [1024]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113439424)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_204_axes_0 = const()[name = tensor<string, []>("op_204_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_204_cast_fp16 = expand_dims(axes = var_204_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_204_cast_fp16")];
+            tensor<int32, [1]> var_205_axes_0 = const()[name = tensor<string, []>("op_205_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_205_cast_fp16 = expand_dims(axes = var_205_axes_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_205_cast_fp16")];
+            tensor<fp16, []> var_145_to_fp16 = const()[name = tensor<string, []>("op_145_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 448]> var_207_cast_fp16 = sub(x = var_145_to_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_207_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_208_cast_fp16 = mul(x = var_87_cast_fp16_0, y = var_207_cast_fp16)[name = tensor<string, []>("op_208_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_209_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_209_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_1_cast_fp16 = add(x = var_208_cast_fp16, y = var_209_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_212_cast_fp16 = mul(x = var_114_cast_fp16_0, y = var_207_cast_fp16)[name = tensor<string, []>("op_212_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_213_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_1_cast_fp16 = add(x = var_212_cast_fp16, y = var_213_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_217, x = query_1_cast_fp16)[name = tensor<string, []>("mh_q_1_cast_fp16")];
+            tensor<fp16, []> var_219_to_fp16 = const()[name = tensor<string, []>("op_219_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_220_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_219_to_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
+            tensor<int32, [4]> var_223 = const()[name = tensor<string, []>("op_223"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_224_cast_fp16 = reshape(shape = var_223, x = key_1_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_220_cast_fp16, y = var_224_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_228_axes_0 = const()[name = tensor<string, []>("op_228_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_228_cast_fp16 = expand_dims(axes = var_228_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_228_cast_fp16")];
+            tensor<int32, [1]> var_229_axes_0 = const()[name = tensor<string, []>("op_229_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_229_cast_fp16 = expand_dims(axes = var_229_axes_0, x = var_228_cast_fp16)[name = tensor<string, []>("op_229_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_232_cast_fp16 = softmax(axis = var_144, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
+            tensor<int32, [4]> var_233 = const()[name = tensor<string, []>("op_233"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_234_cast_fp16 = reshape(shape = var_233, x = value_1_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_234_cast_fp16, y = var_232_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_1_cast_fp16 = reshape(shape = var_237, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_7_strides_0 = const()[name = tensor<string, []>("obj_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_7_dilations_0 = const()[name = tensor<string, []>("obj_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_7_groups_0 = const()[name = tensor<string, []>("obj_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113441536)))];
+            tensor<fp16, [1024]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115538752)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = tensor<string, []>("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_259_to_fp16 = const()[name = tensor<string, []>("op_259_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_259_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [1024]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115540864)))];
+            tensor<fp16, [1024]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115542976)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_3_strides_0 = const()[name = tensor<string, []>("query_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_3_dilations_0 = const()[name = tensor<string, []>("query_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_3_groups_0 = const()[name = tensor<string, []>("query_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115545088)))];
+            tensor<fp16, [1024]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117642304)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_3_strides_0 = const()[name = tensor<string, []>("key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_3_dilations_0 = const()[name = tensor<string, []>("key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_3_groups_0 = const()[name = tensor<string, []>("key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117644416)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_3_strides_0 = const()[name = tensor<string, []>("value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_3_dilations_0 = const()[name = tensor<string, []>("value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_3_groups_0 = const()[name = tensor<string, []>("value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119741632)))];
+            tensor<fp16, [1024]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121838848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_295 = const()[name = tensor<string, []>("op_295"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_295, x = query_3_cast_fp16)[name = tensor<string, []>("mh_q_3_cast_fp16")];
+            tensor<fp16, []> var_297_to_fp16 = const()[name = tensor<string, []>("op_297_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_298_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_297_to_fp16)[name = tensor<string, []>("op_298_cast_fp16")];
+            tensor<int32, [4]> var_301 = const()[name = tensor<string, []>("op_301"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_302_cast_fp16 = reshape(shape = var_301, x = key_3_cast_fp16)[name = tensor<string, []>("op_302_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_298_cast_fp16, y = var_302_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_144, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_307_cast_fp16 = reshape(shape = var_306, x = value_3_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_307_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_3_cast_fp16 = reshape(shape = var_310, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_11_strides_0 = const()[name = tensor<string, []>("obj_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_11_dilations_0 = const()[name = tensor<string, []>("obj_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_11_groups_0 = const()[name = tensor<string, []>("obj_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121840960)))];
+            tensor<fp16, [1024]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123938176)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = tensor<string, []>("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_328_to_fp16 = const()[name = tensor<string, []>("op_328_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_328_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [1024]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123940288)))];
+            tensor<fp16, [1024]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123942400)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_7_strides_0 = const()[name = tensor<string, []>("input_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_7_dilations_0 = const()[name = tensor<string, []>("input_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_7_groups_0 = const()[name = tensor<string, []>("input_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123944512)))];
+            tensor<fp16, [4096]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132333184)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_3_strides_0 = const()[name = tensor<string, []>("hidden_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_3_dilations_0 = const()[name = tensor<string, []>("hidden_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_3_groups_0 = const()[name = tensor<string, []>("hidden_states_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132341440)))];
+            tensor<fp16, [1024]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140730112)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_363 = const()[name = tensor<string, []>("op_363"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = tensor<string, []>("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_388_to_fp16 = const()[name = tensor<string, []>("op_388_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_388_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [1024]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140732224)))];
+            tensor<fp16, [1024]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140734336)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_5_strides_0 = const()[name = tensor<string, []>("query_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_5_dilations_0 = const()[name = tensor<string, []>("query_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_5_groups_0 = const()[name = tensor<string, []>("query_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140736448)))];
+            tensor<fp16, [1024]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142833664)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_3_strides_0 = const()[name = tensor<string, []>("current_key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_3_dilations_0 = const()[name = tensor<string, []>("current_key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_3_groups_0 = const()[name = tensor<string, []>("current_key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142835776)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_3_cast_fp16 = conv(dilations = current_key_3_dilations_0, groups = current_key_3_groups_0, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = current_key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_3_strides_0 = const()[name = tensor<string, []>("current_value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_3_dilations_0 = const()[name = tensor<string, []>("current_value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_3_groups_0 = const()[name = tensor<string, []>("current_value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144932992)))];
+            tensor<fp16, [1024]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147030208)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_427_cast_fp16 = mul(x = var_87_cast_fp16_1, y = var_207_cast_fp16)[name = tensor<string, []>("op_427_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_428_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_5_cast_fp16 = add(x = var_427_cast_fp16, y = var_428_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_431_cast_fp16 = mul(x = var_114_cast_fp16_1, y = var_207_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_432_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_432_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_5_cast_fp16 = add(x = var_431_cast_fp16, y = var_432_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_436 = const()[name = tensor<string, []>("op_436"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_436, x = query_5_cast_fp16)[name = tensor<string, []>("mh_q_5_cast_fp16")];
+            tensor<fp16, []> var_438_to_fp16 = const()[name = tensor<string, []>("op_438_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_439_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_438_to_fp16)[name = tensor<string, []>("op_439_cast_fp16")];
+            tensor<int32, [4]> var_442 = const()[name = tensor<string, []>("op_442"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_443_cast_fp16 = reshape(shape = var_442, x = key_5_cast_fp16)[name = tensor<string, []>("op_443_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_439_cast_fp16, y = var_443_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_451_cast_fp16 = softmax(axis = var_363, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
+            tensor<int32, [4]> var_452 = const()[name = tensor<string, []>("op_452"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_453_cast_fp16 = reshape(shape = var_452, x = value_5_cast_fp16)[name = tensor<string, []>("op_453_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_453_cast_fp16, y = var_451_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_456 = const()[name = tensor<string, []>("op_456"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_11_cast_fp16 = reshape(shape = var_456, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_21_strides_0 = const()[name = tensor<string, []>("obj_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_21_dilations_0 = const()[name = tensor<string, []>("obj_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_21_groups_0 = const()[name = tensor<string, []>("obj_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147032320)))];
+            tensor<fp16, [1024]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149129536)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_21_dilations_0, groups = obj_21_groups_0, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = obj_21_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = tensor<string, []>("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_478_to_fp16 = const()[name = tensor<string, []>("op_478_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_478_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [1024]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149131648)))];
+            tensor<fp16, [1024]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149133760)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_7_strides_0 = const()[name = tensor<string, []>("query_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_7_dilations_0 = const()[name = tensor<string, []>("query_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_7_groups_0 = const()[name = tensor<string, []>("query_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149135872)))];
+            tensor<fp16, [1024]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151233088)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_7_strides_0 = const()[name = tensor<string, []>("key_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_7_dilations_0 = const()[name = tensor<string, []>("key_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_7_groups_0 = const()[name = tensor<string, []>("key_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151235200)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_7_strides_0 = const()[name = tensor<string, []>("value_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_7_dilations_0 = const()[name = tensor<string, []>("value_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_7_groups_0 = const()[name = tensor<string, []>("value_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(153332416)))];
+            tensor<fp16, [1024]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155429632)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_514 = const()[name = tensor<string, []>("op_514"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_514, x = query_7_cast_fp16)[name = tensor<string, []>("mh_q_7_cast_fp16")];
+            tensor<fp16, []> var_516_to_fp16 = const()[name = tensor<string, []>("op_516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_517_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_516_to_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
+            tensor<int32, [4]> var_520 = const()[name = tensor<string, []>("op_520"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_521_cast_fp16 = reshape(shape = var_520, x = key_7_cast_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_517_cast_fp16, y = var_521_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_363, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_525 = const()[name = tensor<string, []>("op_525"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_526_cast_fp16 = reshape(shape = var_525, x = value_7_cast_fp16)[name = tensor<string, []>("op_526_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_526_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_529 = const()[name = tensor<string, []>("op_529"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_13_cast_fp16 = reshape(shape = var_529, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_25_strides_0 = const()[name = tensor<string, []>("obj_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_25_dilations_0 = const()[name = tensor<string, []>("obj_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_25_groups_0 = const()[name = tensor<string, []>("obj_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155431744)))];
+            tensor<fp16, [1024]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157528960)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = obj_25_dilations_0, groups = obj_25_groups_0, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = obj_25_strides_0, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = tensor<string, []>("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_547_to_fp16 = const()[name = tensor<string, []>("op_547_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_547_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [1024]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157531072)))];
+            tensor<fp16, [1024]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157533184)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_17_strides_0 = const()[name = tensor<string, []>("input_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_17_dilations_0 = const()[name = tensor<string, []>("input_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_17_groups_0 = const()[name = tensor<string, []>("input_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157535296)))];
+            tensor<fp16, [4096]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165923968)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_5_strides_0 = const()[name = tensor<string, []>("hidden_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_5_dilations_0 = const()[name = tensor<string, []>("hidden_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_5_groups_0 = const()[name = tensor<string, []>("hidden_states_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165932224)))];
+            tensor<fp16, [1024]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174320896)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_582 = const()[name = tensor<string, []>("op_582"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = tensor<string, []>("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_607_to_fp16 = const()[name = tensor<string, []>("op_607_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_607_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [1024]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174323008)))];
+            tensor<fp16, [1024]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174325120)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_9_strides_0 = const()[name = tensor<string, []>("query_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_9_dilations_0 = const()[name = tensor<string, []>("query_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_9_groups_0 = const()[name = tensor<string, []>("query_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174327232)))];
+            tensor<fp16, [1024]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176424448)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_5_strides_0 = const()[name = tensor<string, []>("current_key_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_5_dilations_0 = const()[name = tensor<string, []>("current_key_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_5_groups_0 = const()[name = tensor<string, []>("current_key_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176426560)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_5_cast_fp16 = conv(dilations = current_key_5_dilations_0, groups = current_key_5_groups_0, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = current_key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_5_strides_0 = const()[name = tensor<string, []>("current_value_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_5_dilations_0 = const()[name = tensor<string, []>("current_value_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_5_groups_0 = const()[name = tensor<string, []>("current_value_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178523776)))];
+            tensor<fp16, [1024]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180620992)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = current_value_5_dilations_0, groups = current_value_5_groups_0, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = current_value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_646_cast_fp16 = mul(x = var_87_cast_fp16_2, y = var_207_cast_fp16)[name = tensor<string, []>("op_646_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_647_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_9_cast_fp16 = add(x = var_646_cast_fp16, y = var_647_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_650_cast_fp16 = mul(x = var_114_cast_fp16_2, y = var_207_cast_fp16)[name = tensor<string, []>("op_650_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_651_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_9_cast_fp16 = add(x = var_650_cast_fp16, y = var_651_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_655 = const()[name = tensor<string, []>("op_655"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_655, x = query_9_cast_fp16)[name = tensor<string, []>("mh_q_9_cast_fp16")];
+            tensor<fp16, []> var_657_to_fp16 = const()[name = tensor<string, []>("op_657_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_658_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_657_to_fp16)[name = tensor<string, []>("op_658_cast_fp16")];
+            tensor<int32, [4]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_662_cast_fp16 = reshape(shape = var_661, x = key_9_cast_fp16)[name = tensor<string, []>("op_662_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_658_cast_fp16, y = var_662_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_670_cast_fp16 = softmax(axis = var_582, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_670_cast_fp16")];
+            tensor<int32, [4]> var_671 = const()[name = tensor<string, []>("op_671"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_672_cast_fp16 = reshape(shape = var_671, x = value_9_cast_fp16)[name = tensor<string, []>("op_672_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_672_cast_fp16, y = var_670_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_675 = const()[name = tensor<string, []>("op_675"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_21_cast_fp16 = reshape(shape = var_675, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_35_strides_0 = const()[name = tensor<string, []>("obj_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_35_dilations_0 = const()[name = tensor<string, []>("obj_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_35_groups_0 = const()[name = tensor<string, []>("obj_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180623104)))];
+            tensor<fp16, [1024]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182720320)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = tensor<string, []>("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_697_to_fp16 = const()[name = tensor<string, []>("op_697_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_697_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [1024]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182722432)))];
+            tensor<fp16, [1024]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182724544)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_11_strides_0 = const()[name = tensor<string, []>("query_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_11_dilations_0 = const()[name = tensor<string, []>("query_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_11_groups_0 = const()[name = tensor<string, []>("query_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182726656)))];
+            tensor<fp16, [1024]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184823872)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = query_11_dilations_0, groups = query_11_groups_0, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = query_11_strides_0, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_11_strides_0 = const()[name = tensor<string, []>("key_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_11_dilations_0 = const()[name = tensor<string, []>("key_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_11_groups_0 = const()[name = tensor<string, []>("key_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184825984)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_11_cast_fp16 = conv(dilations = key_11_dilations_0, groups = key_11_groups_0, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = key_11_strides_0, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_11_strides_0 = const()[name = tensor<string, []>("value_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_11_dilations_0 = const()[name = tensor<string, []>("value_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_11_groups_0 = const()[name = tensor<string, []>("value_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186923200)))];
+            tensor<fp16, [1024]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189020416)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = value_11_dilations_0, groups = value_11_groups_0, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = value_11_strides_0, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_733 = const()[name = tensor<string, []>("op_733"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_733, x = query_11_cast_fp16)[name = tensor<string, []>("mh_q_11_cast_fp16")];
+            tensor<fp16, []> var_735_to_fp16 = const()[name = tensor<string, []>("op_735_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_736_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_735_to_fp16)[name = tensor<string, []>("op_736_cast_fp16")];
+            tensor<int32, [4]> var_739 = const()[name = tensor<string, []>("op_739"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_740_cast_fp16 = reshape(shape = var_739, x = key_11_cast_fp16)[name = tensor<string, []>("op_740_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_736_cast_fp16, y = var_740_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_582, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_744 = const()[name = tensor<string, []>("op_744"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_745_cast_fp16 = reshape(shape = var_744, x = value_11_cast_fp16)[name = tensor<string, []>("op_745_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_745_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_748 = const()[name = tensor<string, []>("op_748"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_23_cast_fp16 = reshape(shape = var_748, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_39_strides_0 = const()[name = tensor<string, []>("obj_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_39_dilations_0 = const()[name = tensor<string, []>("obj_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_39_groups_0 = const()[name = tensor<string, []>("obj_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189022528)))];
+            tensor<fp16, [1024]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191119744)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = obj_39_dilations_0, groups = obj_39_groups_0, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = obj_39_strides_0, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = tensor<string, []>("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_766_to_fp16 = const()[name = tensor<string, []>("op_766_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_766_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [1024]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191121856)))];
+            tensor<fp16, [1024]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191123968)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_27_strides_0 = const()[name = tensor<string, []>("input_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_27_dilations_0 = const()[name = tensor<string, []>("input_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_27_groups_0 = const()[name = tensor<string, []>("input_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191126080)))];
+            tensor<fp16, [4096]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199514752)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = tensor<string, []>("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = tensor<string, []>("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_7_groups_0 = const()[name = tensor<string, []>("hidden_states_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199523008)))];
+            tensor<fp16, [1024]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207911680)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = tensor<string, []>("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_826_to_fp16 = const()[name = tensor<string, []>("op_826_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_826_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [1024]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207913792)))];
+            tensor<fp16, [1024]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207915904)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_13_strides_0 = const()[name = tensor<string, []>("query_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_13_dilations_0 = const()[name = tensor<string, []>("query_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_13_groups_0 = const()[name = tensor<string, []>("query_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207918016)))];
+            tensor<fp16, [1024]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210015232)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<string, []> current_key_7_pad_type_0 = const()[name = tensor<string, []>("current_key_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_7_strides_0 = const()[name = tensor<string, []>("current_key_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_7_pad_0 = const()[name = tensor<string, []>("current_key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_7_dilations_0 = const()[name = tensor<string, []>("current_key_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_7_groups_0 = const()[name = tensor<string, []>("current_key_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210017344)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_7_cast_fp16 = conv(dilations = current_key_7_dilations_0, groups = current_key_7_groups_0, pad = current_key_7_pad_0, pad_type = current_key_7_pad_type_0, strides = current_key_7_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_7_cast_fp16")];
+            tensor<string, []> current_value_7_pad_type_0 = const()[name = tensor<string, []>("current_value_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_7_strides_0 = const()[name = tensor<string, []>("current_value_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_7_pad_0 = const()[name = tensor<string, []>("current_value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_7_dilations_0 = const()[name = tensor<string, []>("current_value_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_7_groups_0 = const()[name = tensor<string, []>("current_value_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212114560)))];
+            tensor<fp16, [1024]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214211776)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = current_value_7_dilations_0, groups = current_value_7_groups_0, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = current_value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_865_cast_fp16 = mul(x = var_87_cast_fp16_3, y = var_207_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_866_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_866_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_13_cast_fp16 = add(x = var_865_cast_fp16, y = var_866_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_869_cast_fp16 = mul(x = var_114_cast_fp16_3, y = var_207_cast_fp16)[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_870_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_870_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_13_cast_fp16 = add(x = var_869_cast_fp16, y = var_870_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_874 = const()[name = tensor<string, []>("op_874"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_874, x = query_13_cast_fp16)[name = tensor<string, []>("mh_q_13_cast_fp16")];
+            tensor<fp16, []> var_876_to_fp16 = const()[name = tensor<string, []>("op_876_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_877_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_876_to_fp16)[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<int32, [4]> var_880 = const()[name = tensor<string, []>("op_880"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_881_cast_fp16 = reshape(shape = var_880, x = key_13_cast_fp16)[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_877_cast_fp16, y = var_881_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_889_cast_fp16 = softmax(axis = var_801, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [4]> var_890 = const()[name = tensor<string, []>("op_890"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_891_cast_fp16 = reshape(shape = var_890, x = value_13_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_891_cast_fp16, y = var_889_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_894 = const()[name = tensor<string, []>("op_894"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_31_cast_fp16 = reshape(shape = var_894, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_49_strides_0 = const()[name = tensor<string, []>("obj_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_49_dilations_0 = const()[name = tensor<string, []>("obj_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_49_groups_0 = const()[name = tensor<string, []>("obj_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214213888)))];
+            tensor<fp16, [1024]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216311104)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_49_dilations_0, groups = obj_49_groups_0, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = obj_49_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = tensor<string, []>("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_916_to_fp16 = const()[name = tensor<string, []>("op_916_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_916_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [1024]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216313216)))];
+            tensor<fp16, [1024]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216315328)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_15_strides_0 = const()[name = tensor<string, []>("query_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_15_dilations_0 = const()[name = tensor<string, []>("query_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_15_groups_0 = const()[name = tensor<string, []>("query_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216317440)))];
+            tensor<fp16, [1024]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218414656)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_15_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = query_15_dilations_0, groups = query_15_groups_0, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = query_15_strides_0, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_15_strides_0 = const()[name = tensor<string, []>("key_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_15_dilations_0 = const()[name = tensor<string, []>("key_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_15_groups_0 = const()[name = tensor<string, []>("key_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218416768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_15_cast_fp16 = conv(dilations = key_15_dilations_0, groups = key_15_groups_0, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = key_15_strides_0, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_15_strides_0 = const()[name = tensor<string, []>("value_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_15_dilations_0 = const()[name = tensor<string, []>("value_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_15_groups_0 = const()[name = tensor<string, []>("value_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220513984)))];
+            tensor<fp16, [1024]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222611200)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = value_15_dilations_0, groups = value_15_groups_0, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = value_15_strides_0, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_952 = const()[name = tensor<string, []>("op_952"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_15_cast_fp16 = reshape(shape = var_952, x = query_15_cast_fp16)[name = tensor<string, []>("mh_q_15_cast_fp16")];
+            tensor<fp16, []> var_954_to_fp16 = const()[name = tensor<string, []>("op_954_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_955_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_954_to_fp16)[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<int32, [4]> var_958 = const()[name = tensor<string, []>("op_958"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_959_cast_fp16 = reshape(shape = var_958, x = key_15_cast_fp16)[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<bool, []> mh_w_23_transpose_x_0 = const()[name = tensor<string, []>("mh_w_23_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_23_transpose_y_0 = const()[name = tensor<string, []>("mh_w_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_955_cast_fp16, y = var_959_cast_fp16)[name = tensor<string, []>("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_801, x = mh_w_23_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_963 = const()[name = tensor<string, []>("op_963"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_964_cast_fp16 = reshape(shape = var_963, x = value_15_cast_fp16)[name = tensor<string, []>("op_964_cast_fp16")];
+            tensor<bool, []> attn_15_transpose_x_0 = const()[name = tensor<string, []>("attn_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_15_transpose_y_0 = const()[name = tensor<string, []>("attn_15_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_964_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_967 = const()[name = tensor<string, []>("op_967"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_33_cast_fp16 = reshape(shape = var_967, x = attn_15_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_53_strides_0 = const()[name = tensor<string, []>("obj_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_53_dilations_0 = const()[name = tensor<string, []>("obj_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_53_groups_0 = const()[name = tensor<string, []>("obj_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222613312)))];
+            tensor<fp16, [1024]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224710528)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = obj_53_dilations_0, groups = obj_53_groups_0, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = obj_53_strides_0, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = tensor<string, []>("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_985_to_fp16 = const()[name = tensor<string, []>("op_985_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_985_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [1024]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224712640)))];
+            tensor<fp16, [1024]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224714752)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_37_strides_0 = const()[name = tensor<string, []>("input_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_37_dilations_0 = const()[name = tensor<string, []>("input_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_37_groups_0 = const()[name = tensor<string, []>("input_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224716864)))];
+            tensor<fp16, [4096]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(233105536)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_9_strides_0 = const()[name = tensor<string, []>("hidden_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_9_dilations_0 = const()[name = tensor<string, []>("hidden_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_9_groups_0 = const()[name = tensor<string, []>("hidden_states_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(233113792)))];
+            tensor<fp16, [1024]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241502464)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1020 = const()[name = tensor<string, []>("op_1020"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = tensor<string, []>("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1045_to_fp16 = const()[name = tensor<string, []>("op_1045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1045_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [1024]> obj_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241504576)))];
+            tensor<fp16, [1024]> obj_57_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_57_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241506688)))];
+            tensor<fp16, []> obj_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_57_cast_fp16")];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_17_strides_0 = const()[name = tensor<string, []>("query_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_17_dilations_0 = const()[name = tensor<string, []>("query_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_17_groups_0 = const()[name = tensor<string, []>("query_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241508800)))];
+            tensor<fp16, [1024]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(243606016)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_17_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = query_17_dilations_0, groups = query_17_groups_0, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = query_17_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<string, []> current_key_9_pad_type_0 = const()[name = tensor<string, []>("current_key_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_9_strides_0 = const()[name = tensor<string, []>("current_key_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_9_pad_0 = const()[name = tensor<string, []>("current_key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_9_dilations_0 = const()[name = tensor<string, []>("current_key_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_9_groups_0 = const()[name = tensor<string, []>("current_key_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(243608128)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_9_cast_fp16 = conv(dilations = current_key_9_dilations_0, groups = current_key_9_groups_0, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = current_key_9_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_key_9_cast_fp16")];
+            tensor<string, []> current_value_9_pad_type_0 = const()[name = tensor<string, []>("current_value_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_9_strides_0 = const()[name = tensor<string, []>("current_value_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_9_pad_0 = const()[name = tensor<string, []>("current_value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_9_dilations_0 = const()[name = tensor<string, []>("current_value_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_9_groups_0 = const()[name = tensor<string, []>("current_value_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245705344)))];
+            tensor<fp16, [1024]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(247802560)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = current_value_9_dilations_0, groups = current_value_9_groups_0, pad = current_value_9_pad_0, pad_type = current_value_9_pad_type_0, strides = current_value_9_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1084_cast_fp16 = mul(x = var_87_cast_fp16_4, y = var_207_cast_fp16)[name = tensor<string, []>("op_1084_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1085_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1085_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_17_cast_fp16 = add(x = var_1084_cast_fp16, y = var_1085_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1088_cast_fp16 = mul(x = var_114_cast_fp16_4, y = var_207_cast_fp16)[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1089_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1089_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_17_cast_fp16 = add(x = var_1088_cast_fp16, y = var_1089_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1093 = const()[name = tensor<string, []>("op_1093"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_17_cast_fp16 = reshape(shape = var_1093, x = query_17_cast_fp16)[name = tensor<string, []>("mh_q_17_cast_fp16")];
+            tensor<fp16, []> var_1095_to_fp16 = const()[name = tensor<string, []>("op_1095_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1096_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1095_to_fp16)[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<int32, [4]> var_1099 = const()[name = tensor<string, []>("op_1099"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1100_cast_fp16 = reshape(shape = var_1099, x = key_17_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<bool, []> mh_w_25_transpose_x_0 = const()[name = tensor<string, []>("mh_w_25_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_25_transpose_y_0 = const()[name = tensor<string, []>("mh_w_25_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1096_cast_fp16, y = var_1100_cast_fp16)[name = tensor<string, []>("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_1108_cast_fp16 = softmax(axis = var_1020, x = mh_w_27_cast_fp16)[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<int32, [4]> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1110_cast_fp16 = reshape(shape = var_1109, x = value_17_cast_fp16)[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<bool, []> attn_17_transpose_x_0 = const()[name = tensor<string, []>("attn_17_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_17_transpose_y_0 = const()[name = tensor<string, []>("attn_17_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1110_cast_fp16, y = var_1108_cast_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1113 = const()[name = tensor<string, []>("op_1113"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1113, x = attn_17_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> obj_63_pad_type_0 = const()[name = tensor<string, []>("obj_63_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_63_strides_0 = const()[name = tensor<string, []>("obj_63_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_63_pad_0 = const()[name = tensor<string, []>("obj_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_63_dilations_0 = const()[name = tensor<string, []>("obj_63_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_63_groups_0 = const()[name = tensor<string, []>("obj_63_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(247804672)))];
+            tensor<fp16, [1024]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249901888)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_63_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = obj_63_dilations_0, groups = obj_63_groups_0, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = obj_63_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_63_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = tensor<string, []>("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1135_to_fp16 = const()[name = tensor<string, []>("op_1135_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1135_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [1024]> obj_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249904000)))];
+            tensor<fp16, [1024]> obj_65_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_65_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249906112)))];
+            tensor<fp16, []> obj_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("obj_65_cast_fp16")];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_19_strides_0 = const()[name = tensor<string, []>("query_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_19_dilations_0 = const()[name = tensor<string, []>("query_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_19_groups_0 = const()[name = tensor<string, []>("query_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249908224)))];
+            tensor<fp16, [1024]> layers_4_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(252005440)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_19_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_4_encoder_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_19_strides_0 = const()[name = tensor<string, []>("key_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_19_dilations_0 = const()[name = tensor<string, []>("key_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_19_groups_0 = const()[name = tensor<string, []>("key_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(252007552)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_19_cast_fp16 = conv(dilations = key_19_dilations_0, groups = key_19_groups_0, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = key_19_strides_0, weight = layers_4_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_19_strides_0 = const()[name = tensor<string, []>("value_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_19_dilations_0 = const()[name = tensor<string, []>("value_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_19_groups_0 = const()[name = tensor<string, []>("value_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254104768)))];
+            tensor<fp16, [1024]> layers_4_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256201984)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_bias_to_fp16, dilations = value_19_dilations_0, groups = value_19_groups_0, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = value_19_strides_0, weight = layers_4_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1171 = const()[name = tensor<string, []>("op_1171"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_19_cast_fp16 = reshape(shape = var_1171, x = query_19_cast_fp16)[name = tensor<string, []>("mh_q_19_cast_fp16")];
+            tensor<fp16, []> var_1173_to_fp16 = const()[name = tensor<string, []>("op_1173_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1174_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1173_to_fp16)[name = tensor<string, []>("op_1174_cast_fp16")];
+            tensor<int32, [4]> var_1177 = const()[name = tensor<string, []>("op_1177"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1178_cast_fp16 = reshape(shape = var_1177, x = key_19_cast_fp16)[name = tensor<string, []>("op_1178_cast_fp16")];
+            tensor<bool, []> mh_w_29_transpose_x_0 = const()[name = tensor<string, []>("mh_w_29_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_29_transpose_y_0 = const()[name = tensor<string, []>("mh_w_29_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1174_cast_fp16, y = var_1178_cast_fp16)[name = tensor<string, []>("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_69_cast_fp16 = softmax(axis = var_1020, x = mh_w_29_cast_fp16)[name = tensor<string, []>("obj_69_cast_fp16")];
+            tensor<int32, [4]> var_1182 = const()[name = tensor<string, []>("op_1182"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1183_cast_fp16 = reshape(shape = var_1182, x = value_19_cast_fp16)[name = tensor<string, []>("op_1183_cast_fp16")];
+            tensor<bool, []> attn_19_transpose_x_0 = const()[name = tensor<string, []>("attn_19_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_19_transpose_y_0 = const()[name = tensor<string, []>("attn_19_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1183_cast_fp16, y = obj_69_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1186 = const()[name = tensor<string, []>("op_1186"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1186, x = attn_19_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> obj_67_pad_type_0 = const()[name = tensor<string, []>("obj_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_67_strides_0 = const()[name = tensor<string, []>("obj_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_67_pad_0 = const()[name = tensor<string, []>("obj_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_67_dilations_0 = const()[name = tensor<string, []>("obj_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_67_groups_0 = const()[name = tensor<string, []>("obj_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_4_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256204096)))];
+            tensor<fp16, [1024]> layers_4_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258301312)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_67_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_bias_to_fp16, dilations = obj_67_dilations_0, groups = obj_67_groups_0, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = obj_67_strides_0, weight = layers_4_encoder_attn_o_proj_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("obj_67_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = tensor<string, []>("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1204_to_fp16 = const()[name = tensor<string, []>("op_1204_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1204_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [1024]> input_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_45_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258303424)))];
+            tensor<fp16, [1024]> input_45_beta_0_to_fp16 = const()[name = tensor<string, []>("input_45_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258305536)))];
+            tensor<fp16, []> input_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> input_47_pad_type_0 = const()[name = tensor<string, []>("input_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_47_strides_0 = const()[name = tensor<string, []>("input_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_47_pad_0 = const()[name = tensor<string, []>("input_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_47_dilations_0 = const()[name = tensor<string, []>("input_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_47_groups_0 = const()[name = tensor<string, []>("input_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258307648)))];
+            tensor<fp16, [4096]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266696320)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_47_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = input_47_dilations_0, groups = input_47_groups_0, pad = input_47_pad_0, pad_type = input_47_pad_type_0, strides = input_47_strides_0, weight = layers_4_fc1_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_mode_0 = const()[name = tensor<string, []>("input_49_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_11_strides_0 = const()[name = tensor<string, []>("hidden_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_11_dilations_0 = const()[name = tensor<string, []>("hidden_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_11_groups_0 = const()[name = tensor<string, []>("hidden_states_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266704576)))];
+            tensor<fp16, [1024]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275093248)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_11_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = layers_4_fc2_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, []> var_1239 = const()[name = tensor<string, []>("op_1239"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = tensor<string, []>("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1264_to_fp16 = const()[name = tensor<string, []>("op_1264_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1264_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [1024]> obj_71_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_71_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275095360)))];
+            tensor<fp16, [1024]> obj_71_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_71_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275097472)))];
+            tensor<fp16, []> obj_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("obj_71_cast_fp16")];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_21_strides_0 = const()[name = tensor<string, []>("query_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_21_dilations_0 = const()[name = tensor<string, []>("query_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_21_groups_0 = const()[name = tensor<string, []>("query_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275099584)))];
+            tensor<fp16, [1024]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277196800)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_21_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = query_21_dilations_0, groups = query_21_groups_0, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = query_21_strides_0, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<string, []> current_key_11_pad_type_0 = const()[name = tensor<string, []>("current_key_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_11_strides_0 = const()[name = tensor<string, []>("current_key_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_11_pad_0 = const()[name = tensor<string, []>("current_key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_11_dilations_0 = const()[name = tensor<string, []>("current_key_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_11_groups_0 = const()[name = tensor<string, []>("current_key_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277198912)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_11_cast_fp16 = conv(dilations = current_key_11_dilations_0, groups = current_key_11_groups_0, pad = current_key_11_pad_0, pad_type = current_key_11_pad_type_0, strides = current_key_11_strides_0, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_key_11_cast_fp16")];
+            tensor<string, []> current_value_11_pad_type_0 = const()[name = tensor<string, []>("current_value_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_11_strides_0 = const()[name = tensor<string, []>("current_value_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_11_pad_0 = const()[name = tensor<string, []>("current_value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_11_dilations_0 = const()[name = tensor<string, []>("current_value_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_11_groups_0 = const()[name = tensor<string, []>("current_value_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279296128)))];
+            tensor<fp16, [1024]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(281393344)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = current_value_11_dilations_0, groups = current_value_11_groups_0, pad = current_value_11_pad_0, pad_type = current_value_11_pad_type_0, strides = current_value_11_strides_0, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_value_11_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1303_cast_fp16 = mul(x = var_87_cast_fp16_5, y = var_207_cast_fp16)[name = tensor<string, []>("op_1303_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1304_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1304_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_21_cast_fp16 = add(x = var_1303_cast_fp16, y = var_1304_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1307_cast_fp16 = mul(x = var_114_cast_fp16_5, y = var_207_cast_fp16)[name = tensor<string, []>("op_1307_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1308_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1308_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_21_cast_fp16 = add(x = var_1307_cast_fp16, y = var_1308_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_1312 = const()[name = tensor<string, []>("op_1312"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_21_cast_fp16 = reshape(shape = var_1312, x = query_21_cast_fp16)[name = tensor<string, []>("mh_q_21_cast_fp16")];
+            tensor<fp16, []> var_1314_to_fp16 = const()[name = tensor<string, []>("op_1314_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1315_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1314_to_fp16)[name = tensor<string, []>("op_1315_cast_fp16")];
+            tensor<int32, [4]> var_1318 = const()[name = tensor<string, []>("op_1318"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1319_cast_fp16 = reshape(shape = var_1318, x = key_21_cast_fp16)[name = tensor<string, []>("op_1319_cast_fp16")];
+            tensor<bool, []> mh_w_31_transpose_x_0 = const()[name = tensor<string, []>("mh_w_31_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_31_transpose_y_0 = const()[name = tensor<string, []>("mh_w_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1315_cast_fp16, y = var_1319_cast_fp16)[name = tensor<string, []>("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_1327_cast_fp16 = softmax(axis = var_1239, x = mh_w_33_cast_fp16)[name = tensor<string, []>("op_1327_cast_fp16")];
+            tensor<int32, [4]> var_1328 = const()[name = tensor<string, []>("op_1328"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1329_cast_fp16 = reshape(shape = var_1328, x = value_21_cast_fp16)[name = tensor<string, []>("op_1329_cast_fp16")];
+            tensor<bool, []> attn_21_transpose_x_0 = const()[name = tensor<string, []>("attn_21_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_21_transpose_y_0 = const()[name = tensor<string, []>("attn_21_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1329_cast_fp16, y = var_1327_cast_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1332 = const()[name = tensor<string, []>("op_1332"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1332, x = attn_21_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> obj_77_pad_type_0 = const()[name = tensor<string, []>("obj_77_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_77_strides_0 = const()[name = tensor<string, []>("obj_77_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_77_pad_0 = const()[name = tensor<string, []>("obj_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_77_dilations_0 = const()[name = tensor<string, []>("obj_77_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_77_groups_0 = const()[name = tensor<string, []>("obj_77_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(281395456)))];
+            tensor<fp16, [1024]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283492672)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_77_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = obj_77_dilations_0, groups = obj_77_groups_0, pad = obj_77_pad_0, pad_type = obj_77_pad_type_0, strides = obj_77_strides_0, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("obj_77_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = tensor<string, []>("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1354_to_fp16 = const()[name = tensor<string, []>("op_1354_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1354_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [1024]> obj_79_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283494784)))];
+            tensor<fp16, [1024]> obj_79_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_79_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283496896)))];
+            tensor<fp16, []> obj_79_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_79_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_79_cast_fp16")];
+            tensor<string, []> query_23_pad_type_0 = const()[name = tensor<string, []>("query_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_23_strides_0 = const()[name = tensor<string, []>("query_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_23_pad_0 = const()[name = tensor<string, []>("query_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_23_dilations_0 = const()[name = tensor<string, []>("query_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_23_groups_0 = const()[name = tensor<string, []>("query_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283499008)))];
+            tensor<fp16, [1024]> layers_5_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285596224)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_23_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_bias_to_fp16, dilations = query_23_dilations_0, groups = query_23_groups_0, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = query_23_strides_0, weight = layers_5_encoder_attn_q_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor<string, []>("query_23_cast_fp16")];
+            tensor<string, []> key_23_pad_type_0 = const()[name = tensor<string, []>("key_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_23_strides_0 = const()[name = tensor<string, []>("key_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_23_pad_0 = const()[name = tensor<string, []>("key_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_23_dilations_0 = const()[name = tensor<string, []>("key_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_23_groups_0 = const()[name = tensor<string, []>("key_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285598336)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_23_cast_fp16 = conv(dilations = key_23_dilations_0, groups = key_23_groups_0, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = key_23_strides_0, weight = layers_5_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_23_cast_fp16")];
+            tensor<string, []> value_23_pad_type_0 = const()[name = tensor<string, []>("value_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_23_strides_0 = const()[name = tensor<string, []>("value_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_23_pad_0 = const()[name = tensor<string, []>("value_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_23_dilations_0 = const()[name = tensor<string, []>("value_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_23_groups_0 = const()[name = tensor<string, []>("value_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287695552)))];
+            tensor<fp16, [1024]> layers_5_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289792768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_23_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_bias_to_fp16, dilations = value_23_dilations_0, groups = value_23_groups_0, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = value_23_strides_0, weight = layers_5_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_23_cast_fp16")];
+            tensor<int32, [4]> var_1390 = const()[name = tensor<string, []>("op_1390"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_23_cast_fp16 = reshape(shape = var_1390, x = query_23_cast_fp16)[name = tensor<string, []>("mh_q_23_cast_fp16")];
+            tensor<fp16, []> var_1392_to_fp16 = const()[name = tensor<string, []>("op_1392_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1393_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1392_to_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<int32, [4]> var_1396 = const()[name = tensor<string, []>("op_1396"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1397_cast_fp16 = reshape(shape = var_1396, x = key_23_cast_fp16)[name = tensor<string, []>("op_1397_cast_fp16")];
+            tensor<bool, []> mh_w_35_transpose_x_0 = const()[name = tensor<string, []>("mh_w_35_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_35_transpose_y_0 = const()[name = tensor<string, []>("mh_w_35_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_1393_cast_fp16, y = var_1397_cast_fp16)[name = tensor<string, []>("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_83_cast_fp16 = softmax(axis = var_1239, x = mh_w_35_cast_fp16)[name = tensor<string, []>("obj_83_cast_fp16")];
+            tensor<int32, [4]> var_1401 = const()[name = tensor<string, []>("op_1401"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1402_cast_fp16 = reshape(shape = var_1401, x = value_23_cast_fp16)[name = tensor<string, []>("op_1402_cast_fp16")];
+            tensor<bool, []> attn_23_transpose_x_0 = const()[name = tensor<string, []>("attn_23_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_23_transpose_y_0 = const()[name = tensor<string, []>("attn_23_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1402_cast_fp16, y = obj_83_cast_fp16)[name = tensor<string, []>("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1405 = const()[name = tensor<string, []>("op_1405"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1405, x = attn_23_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> obj_81_pad_type_0 = const()[name = tensor<string, []>("obj_81_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_81_strides_0 = const()[name = tensor<string, []>("obj_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_81_pad_0 = const()[name = tensor<string, []>("obj_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_81_dilations_0 = const()[name = tensor<string, []>("obj_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_81_groups_0 = const()[name = tensor<string, []>("obj_81_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_5_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289794880)))];
+            tensor<fp16, [1024]> layers_5_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291892096)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_81_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_bias_to_fp16, dilations = obj_81_dilations_0, groups = obj_81_groups_0, pad = obj_81_pad_0, pad_type = obj_81_pad_type_0, strides = obj_81_strides_0, weight = layers_5_encoder_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("obj_81_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = tensor<string, []>("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1423_to_fp16 = const()[name = tensor<string, []>("op_1423_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1423_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [1024]> input_55_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_55_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291894208)))];
+            tensor<fp16, [1024]> input_55_beta_0_to_fp16 = const()[name = tensor<string, []>("input_55_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291896320)))];
+            tensor<fp16, []> input_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> input_57_pad_type_0 = const()[name = tensor<string, []>("input_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_57_strides_0 = const()[name = tensor<string, []>("input_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_57_pad_0 = const()[name = tensor<string, []>("input_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_57_dilations_0 = const()[name = tensor<string, []>("input_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_57_groups_0 = const()[name = tensor<string, []>("input_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291898432)))];
+            tensor<fp16, [4096]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300287104)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_57_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = layers_5_fc1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_mode_0 = const()[name = tensor<string, []>("input_59_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_13_strides_0 = const()[name = tensor<string, []>("hidden_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_13_dilations_0 = const()[name = tensor<string, []>("hidden_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_13_groups_0 = const()[name = tensor<string, []>("hidden_states_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300295360)))];
+            tensor<fp16, [1024]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308684032)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_13_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = hidden_states_13_dilations_0, groups = hidden_states_13_groups_0, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = hidden_states_13_strides_0, weight = layers_5_fc2_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1458 = const()[name = tensor<string, []>("op_1458"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = tensor<string, []>("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1483_to_fp16 = const()[name = tensor<string, []>("op_1483_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1483_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [1024]> obj_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_85_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308686144)))];
+            tensor<fp16, [1024]> obj_85_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_85_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308688256)))];
+            tensor<fp16, []> obj_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_85_cast_fp16")];
+            tensor<string, []> query_25_pad_type_0 = const()[name = tensor<string, []>("query_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_25_strides_0 = const()[name = tensor<string, []>("query_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_25_pad_0 = const()[name = tensor<string, []>("query_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_25_dilations_0 = const()[name = tensor<string, []>("query_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_25_groups_0 = const()[name = tensor<string, []>("query_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308690368)))];
+            tensor<fp16, [1024]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310787584)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_25_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = query_25_dilations_0, groups = query_25_groups_0, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = query_25_strides_0, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("query_25_cast_fp16")];
+            tensor<string, []> current_key_13_pad_type_0 = const()[name = tensor<string, []>("current_key_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_13_strides_0 = const()[name = tensor<string, []>("current_key_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_13_pad_0 = const()[name = tensor<string, []>("current_key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_13_dilations_0 = const()[name = tensor<string, []>("current_key_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_13_groups_0 = const()[name = tensor<string, []>("current_key_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310789696)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_13_cast_fp16 = conv(dilations = current_key_13_dilations_0, groups = current_key_13_groups_0, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = current_key_13_strides_0, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_key_13_cast_fp16")];
+            tensor<string, []> current_value_13_pad_type_0 = const()[name = tensor<string, []>("current_value_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_13_strides_0 = const()[name = tensor<string, []>("current_value_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_13_pad_0 = const()[name = tensor<string, []>("current_value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_13_dilations_0 = const()[name = tensor<string, []>("current_value_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_13_groups_0 = const()[name = tensor<string, []>("current_value_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312886912)))];
+            tensor<fp16, [1024]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314984128)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = current_value_13_dilations_0, groups = current_value_13_groups_0, pad = current_value_13_pad_0, pad_type = current_value_13_pad_type_0, strides = current_value_13_strides_0, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_value_13_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1522_cast_fp16 = mul(x = var_87_cast_fp16_6, y = var_207_cast_fp16)[name = tensor<string, []>("op_1522_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1523_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1523_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_25_cast_fp16 = add(x = var_1522_cast_fp16, y = var_1523_cast_fp16)[name = tensor<string, []>("key_25_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1526_cast_fp16 = mul(x = var_114_cast_fp16_6, y = var_207_cast_fp16)[name = tensor<string, []>("op_1526_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1527_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1527_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_25_cast_fp16 = add(x = var_1526_cast_fp16, y = var_1527_cast_fp16)[name = tensor<string, []>("value_25_cast_fp16")];
+            tensor<int32, [4]> var_1531 = const()[name = tensor<string, []>("op_1531"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_25_cast_fp16 = reshape(shape = var_1531, x = query_25_cast_fp16)[name = tensor<string, []>("mh_q_25_cast_fp16")];
+            tensor<fp16, []> var_1533_to_fp16 = const()[name = tensor<string, []>("op_1533_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1534_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1533_to_fp16)[name = tensor<string, []>("op_1534_cast_fp16")];
+            tensor<int32, [4]> var_1537 = const()[name = tensor<string, []>("op_1537"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1538_cast_fp16 = reshape(shape = var_1537, x = key_25_cast_fp16)[name = tensor<string, []>("op_1538_cast_fp16")];
+            tensor<bool, []> mh_w_37_transpose_x_0 = const()[name = tensor<string, []>("mh_w_37_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_37_transpose_y_0 = const()[name = tensor<string, []>("mh_w_37_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1534_cast_fp16, y = var_1538_cast_fp16)[name = tensor<string, []>("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_1546_cast_fp16 = softmax(axis = var_1458, x = mh_w_39_cast_fp16)[name = tensor<string, []>("op_1546_cast_fp16")];
+            tensor<int32, [4]> var_1547 = const()[name = tensor<string, []>("op_1547"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1548_cast_fp16 = reshape(shape = var_1547, x = value_25_cast_fp16)[name = tensor<string, []>("op_1548_cast_fp16")];
+            tensor<bool, []> attn_25_transpose_x_0 = const()[name = tensor<string, []>("attn_25_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_25_transpose_y_0 = const()[name = tensor<string, []>("attn_25_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1548_cast_fp16, y = var_1546_cast_fp16)[name = tensor<string, []>("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_61_cast_fp16 = reshape(shape = var_1551, x = attn_25_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> obj_91_pad_type_0 = const()[name = tensor<string, []>("obj_91_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_91_strides_0 = const()[name = tensor<string, []>("obj_91_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_91_pad_0 = const()[name = tensor<string, []>("obj_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_91_dilations_0 = const()[name = tensor<string, []>("obj_91_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_91_groups_0 = const()[name = tensor<string, []>("obj_91_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314986240)))];
+            tensor<fp16, [1024]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(317083456)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_91_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = obj_91_dilations_0, groups = obj_91_groups_0, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = obj_91_strides_0, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("obj_91_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_91_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = tensor<string, []>("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1573_to_fp16 = const()[name = tensor<string, []>("op_1573_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1573_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [1024]> obj_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_93_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(317085568)))];
+            tensor<fp16, [1024]> obj_93_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_93_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(317087680)))];
+            tensor<fp16, []> obj_93_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_93_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("obj_93_cast_fp16")];
+            tensor<string, []> query_27_pad_type_0 = const()[name = tensor<string, []>("query_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_27_strides_0 = const()[name = tensor<string, []>("query_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_27_pad_0 = const()[name = tensor<string, []>("query_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_27_dilations_0 = const()[name = tensor<string, []>("query_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_27_groups_0 = const()[name = tensor<string, []>("query_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(317089792)))];
+            tensor<fp16, [1024]> layers_6_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(319187008)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_27_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_bias_to_fp16, dilations = query_27_dilations_0, groups = query_27_groups_0, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = query_27_strides_0, weight = layers_6_encoder_attn_q_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("query_27_cast_fp16")];
+            tensor<string, []> key_27_pad_type_0 = const()[name = tensor<string, []>("key_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_27_strides_0 = const()[name = tensor<string, []>("key_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_27_pad_0 = const()[name = tensor<string, []>("key_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_27_dilations_0 = const()[name = tensor<string, []>("key_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_27_groups_0 = const()[name = tensor<string, []>("key_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(319189120)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_27_cast_fp16 = conv(dilations = key_27_dilations_0, groups = key_27_groups_0, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = key_27_strides_0, weight = layers_6_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_27_cast_fp16")];
+            tensor<string, []> value_27_pad_type_0 = const()[name = tensor<string, []>("value_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_27_strides_0 = const()[name = tensor<string, []>("value_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_27_pad_0 = const()[name = tensor<string, []>("value_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_27_dilations_0 = const()[name = tensor<string, []>("value_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_27_groups_0 = const()[name = tensor<string, []>("value_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321286336)))];
+            tensor<fp16, [1024]> layers_6_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323383552)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_27_cast_fp16 = conv(bias = layers_6_encoder_attn_v_proj_bias_to_fp16, dilations = value_27_dilations_0, groups = value_27_groups_0, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = value_27_strides_0, weight = layers_6_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_27_cast_fp16")];
+            tensor<int32, [4]> var_1609 = const()[name = tensor<string, []>("op_1609"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_27_cast_fp16 = reshape(shape = var_1609, x = query_27_cast_fp16)[name = tensor<string, []>("mh_q_27_cast_fp16")];
+            tensor<fp16, []> var_1611_to_fp16 = const()[name = tensor<string, []>("op_1611_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1612_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1611_to_fp16)[name = tensor<string, []>("op_1612_cast_fp16")];
+            tensor<int32, [4]> var_1615 = const()[name = tensor<string, []>("op_1615"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1616_cast_fp16 = reshape(shape = var_1615, x = key_27_cast_fp16)[name = tensor<string, []>("op_1616_cast_fp16")];
+            tensor<bool, []> mh_w_41_transpose_x_0 = const()[name = tensor<string, []>("mh_w_41_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_41_transpose_y_0 = const()[name = tensor<string, []>("mh_w_41_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1612_cast_fp16, y = var_1616_cast_fp16)[name = tensor<string, []>("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_97_cast_fp16 = softmax(axis = var_1458, x = mh_w_41_cast_fp16)[name = tensor<string, []>("obj_97_cast_fp16")];
+            tensor<int32, [4]> var_1620 = const()[name = tensor<string, []>("op_1620"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1621_cast_fp16 = reshape(shape = var_1620, x = value_27_cast_fp16)[name = tensor<string, []>("op_1621_cast_fp16")];
+            tensor<bool, []> attn_27_transpose_x_0 = const()[name = tensor<string, []>("attn_27_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_27_transpose_y_0 = const()[name = tensor<string, []>("attn_27_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1621_cast_fp16, y = obj_97_cast_fp16)[name = tensor<string, []>("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1624 = const()[name = tensor<string, []>("op_1624"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_63_cast_fp16 = reshape(shape = var_1624, x = attn_27_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> obj_95_pad_type_0 = const()[name = tensor<string, []>("obj_95_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_95_strides_0 = const()[name = tensor<string, []>("obj_95_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_95_pad_0 = const()[name = tensor<string, []>("obj_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_95_dilations_0 = const()[name = tensor<string, []>("obj_95_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_95_groups_0 = const()[name = tensor<string, []>("obj_95_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_6_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323385664)))];
+            tensor<fp16, [1024]> layers_6_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(325482880)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_95_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_bias_to_fp16, dilations = obj_95_dilations_0, groups = obj_95_groups_0, pad = obj_95_pad_0, pad_type = obj_95_pad_type_0, strides = obj_95_strides_0, weight = layers_6_encoder_attn_o_proj_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("obj_95_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_95_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = tensor<string, []>("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1642_to_fp16 = const()[name = tensor<string, []>("op_1642_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1642_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [1024]> input_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_65_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(325484992)))];
+            tensor<fp16, [1024]> input_65_beta_0_to_fp16 = const()[name = tensor<string, []>("input_65_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(325487104)))];
+            tensor<fp16, []> input_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> input_67_pad_type_0 = const()[name = tensor<string, []>("input_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_67_strides_0 = const()[name = tensor<string, []>("input_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_67_pad_0 = const()[name = tensor<string, []>("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_67_dilations_0 = const()[name = tensor<string, []>("input_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_67_groups_0 = const()[name = tensor<string, []>("input_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(325489216)))];
+            tensor<fp16, [4096]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333877888)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_67_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = layers_6_fc1_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = tensor<string, []>("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = tensor<string, []>("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_15_groups_0 = const()[name = tensor<string, []>("hidden_states_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333886144)))];
+            tensor<fp16, [1024]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342274816)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_15_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = layers_6_fc2_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, []> var_1677 = const()[name = tensor<string, []>("op_1677"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = tensor<string, []>("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1702_to_fp16 = const()[name = tensor<string, []>("op_1702_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_1702_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [1024]> obj_99_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_99_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342276928)))];
+            tensor<fp16, [1024]> obj_99_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_99_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342279040)))];
+            tensor<fp16, []> obj_99_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_99_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_99_cast_fp16 = batch_norm(beta = obj_99_beta_0_to_fp16, epsilon = obj_99_epsilon_0_to_fp16, gamma = obj_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("obj_99_cast_fp16")];
+            tensor<string, []> query_29_pad_type_0 = const()[name = tensor<string, []>("query_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_29_strides_0 = const()[name = tensor<string, []>("query_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_29_pad_0 = const()[name = tensor<string, []>("query_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_29_dilations_0 = const()[name = tensor<string, []>("query_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_29_groups_0 = const()[name = tensor<string, []>("query_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342281152)))];
+            tensor<fp16, [1024]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344378368)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_29_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = query_29_dilations_0, groups = query_29_groups_0, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = query_29_strides_0, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("query_29_cast_fp16")];
+            tensor<string, []> current_key_15_pad_type_0 = const()[name = tensor<string, []>("current_key_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_15_strides_0 = const()[name = tensor<string, []>("current_key_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_15_pad_0 = const()[name = tensor<string, []>("current_key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_15_dilations_0 = const()[name = tensor<string, []>("current_key_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_15_groups_0 = const()[name = tensor<string, []>("current_key_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344380480)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_15_cast_fp16 = conv(dilations = current_key_15_dilations_0, groups = current_key_15_groups_0, pad = current_key_15_pad_0, pad_type = current_key_15_pad_type_0, strides = current_key_15_strides_0, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_key_15_cast_fp16")];
+            tensor<string, []> current_value_15_pad_type_0 = const()[name = tensor<string, []>("current_value_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_15_strides_0 = const()[name = tensor<string, []>("current_value_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_15_pad_0 = const()[name = tensor<string, []>("current_value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_15_dilations_0 = const()[name = tensor<string, []>("current_value_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_15_groups_0 = const()[name = tensor<string, []>("current_value_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(346477696)))];
+            tensor<fp16, [1024]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348574912)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = current_value_15_dilations_0, groups = current_value_15_groups_0, pad = current_value_15_pad_0, pad_type = current_value_15_pad_type_0, strides = current_value_15_strides_0, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_value_15_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1741_cast_fp16 = mul(x = var_87_cast_fp16_7, y = var_207_cast_fp16)[name = tensor<string, []>("op_1741_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1742_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1742_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_29_cast_fp16 = add(x = var_1741_cast_fp16, y = var_1742_cast_fp16)[name = tensor<string, []>("key_29_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1745_cast_fp16 = mul(x = var_114_cast_fp16_7, y = var_207_cast_fp16)[name = tensor<string, []>("op_1745_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1746_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1746_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_29_cast_fp16 = add(x = var_1745_cast_fp16, y = var_1746_cast_fp16)[name = tensor<string, []>("value_29_cast_fp16")];
+            tensor<int32, [4]> var_1750 = const()[name = tensor<string, []>("op_1750"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_29_cast_fp16 = reshape(shape = var_1750, x = query_29_cast_fp16)[name = tensor<string, []>("mh_q_29_cast_fp16")];
+            tensor<fp16, []> var_1752_to_fp16 = const()[name = tensor<string, []>("op_1752_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1753_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_1752_to_fp16)[name = tensor<string, []>("op_1753_cast_fp16")];
+            tensor<int32, [4]> var_1756 = const()[name = tensor<string, []>("op_1756"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1757_cast_fp16 = reshape(shape = var_1756, x = key_29_cast_fp16)[name = tensor<string, []>("op_1757_cast_fp16")];
+            tensor<bool, []> mh_w_43_transpose_x_0 = const()[name = tensor<string, []>("mh_w_43_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_43_transpose_y_0 = const()[name = tensor<string, []>("mh_w_43_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_1753_cast_fp16, y = var_1757_cast_fp16)[name = tensor<string, []>("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_1765_cast_fp16 = softmax(axis = var_1677, x = mh_w_45_cast_fp16)[name = tensor<string, []>("op_1765_cast_fp16")];
+            tensor<int32, [4]> var_1766 = const()[name = tensor<string, []>("op_1766"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1767_cast_fp16 = reshape(shape = var_1766, x = value_29_cast_fp16)[name = tensor<string, []>("op_1767_cast_fp16")];
+            tensor<bool, []> attn_29_transpose_x_0 = const()[name = tensor<string, []>("attn_29_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_29_transpose_y_0 = const()[name = tensor<string, []>("attn_29_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_1767_cast_fp16, y = var_1765_cast_fp16)[name = tensor<string, []>("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_1770 = const()[name = tensor<string, []>("op_1770"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_71_cast_fp16 = reshape(shape = var_1770, x = attn_29_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> obj_105_pad_type_0 = const()[name = tensor<string, []>("obj_105_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_105_strides_0 = const()[name = tensor<string, []>("obj_105_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_105_pad_0 = const()[name = tensor<string, []>("obj_105_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_105_dilations_0 = const()[name = tensor<string, []>("obj_105_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_105_groups_0 = const()[name = tensor<string, []>("obj_105_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348577024)))];
+            tensor<fp16, [1024]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(350674240)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_105_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = obj_105_dilations_0, groups = obj_105_groups_0, pad = obj_105_pad_0, pad_type = obj_105_pad_type_0, strides = obj_105_strides_0, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("obj_105_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_105_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = tensor<string, []>("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1792_to_fp16 = const()[name = tensor<string, []>("op_1792_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_1792_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [1024]> obj_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_107_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(350676352)))];
+            tensor<fp16, [1024]> obj_107_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_107_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(350678464)))];
+            tensor<fp16, []> obj_107_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_107_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_107_cast_fp16 = batch_norm(beta = obj_107_beta_0_to_fp16, epsilon = obj_107_epsilon_0_to_fp16, gamma = obj_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_107_cast_fp16")];
+            tensor<string, []> query_31_pad_type_0 = const()[name = tensor<string, []>("query_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_31_strides_0 = const()[name = tensor<string, []>("query_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_31_pad_0 = const()[name = tensor<string, []>("query_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_31_dilations_0 = const()[name = tensor<string, []>("query_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_31_groups_0 = const()[name = tensor<string, []>("query_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(350680576)))];
+            tensor<fp16, [1024]> layers_7_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(352777792)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_31_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_bias_to_fp16, dilations = query_31_dilations_0, groups = query_31_groups_0, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = query_31_strides_0, weight = layers_7_encoder_attn_q_proj_weight_to_fp16, x = obj_107_cast_fp16)[name = tensor<string, []>("query_31_cast_fp16")];
+            tensor<string, []> key_31_pad_type_0 = const()[name = tensor<string, []>("key_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_31_strides_0 = const()[name = tensor<string, []>("key_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_31_pad_0 = const()[name = tensor<string, []>("key_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_31_dilations_0 = const()[name = tensor<string, []>("key_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_31_groups_0 = const()[name = tensor<string, []>("key_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(352779904)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_31_cast_fp16 = conv(dilations = key_31_dilations_0, groups = key_31_groups_0, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = key_31_strides_0, weight = layers_7_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_31_cast_fp16")];
+            tensor<string, []> value_31_pad_type_0 = const()[name = tensor<string, []>("value_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_31_strides_0 = const()[name = tensor<string, []>("value_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_31_pad_0 = const()[name = tensor<string, []>("value_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_31_dilations_0 = const()[name = tensor<string, []>("value_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_31_groups_0 = const()[name = tensor<string, []>("value_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354877120)))];
+            tensor<fp16, [1024]> layers_7_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356974336)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_31_cast_fp16 = conv(bias = layers_7_encoder_attn_v_proj_bias_to_fp16, dilations = value_31_dilations_0, groups = value_31_groups_0, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = value_31_strides_0, weight = layers_7_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_31_cast_fp16")];
+            tensor<int32, [4]> var_1828 = const()[name = tensor<string, []>("op_1828"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_31_cast_fp16 = reshape(shape = var_1828, x = query_31_cast_fp16)[name = tensor<string, []>("mh_q_31_cast_fp16")];
+            tensor<fp16, []> var_1830_to_fp16 = const()[name = tensor<string, []>("op_1830_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1831_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_1830_to_fp16)[name = tensor<string, []>("op_1831_cast_fp16")];
+            tensor<int32, [4]> var_1834 = const()[name = tensor<string, []>("op_1834"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1835_cast_fp16 = reshape(shape = var_1834, x = key_31_cast_fp16)[name = tensor<string, []>("op_1835_cast_fp16")];
+            tensor<bool, []> mh_w_47_transpose_x_0 = const()[name = tensor<string, []>("mh_w_47_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_47_transpose_y_0 = const()[name = tensor<string, []>("mh_w_47_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_1831_cast_fp16, y = var_1835_cast_fp16)[name = tensor<string, []>("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_111_cast_fp16 = softmax(axis = var_1677, x = mh_w_47_cast_fp16)[name = tensor<string, []>("obj_111_cast_fp16")];
+            tensor<int32, [4]> var_1839 = const()[name = tensor<string, []>("op_1839"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_1840_cast_fp16 = reshape(shape = var_1839, x = value_31_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<bool, []> attn_31_transpose_x_0 = const()[name = tensor<string, []>("attn_31_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_31_transpose_y_0 = const()[name = tensor<string, []>("attn_31_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_1840_cast_fp16, y = obj_111_cast_fp16)[name = tensor<string, []>("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_1843 = const()[name = tensor<string, []>("op_1843"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_73_cast_fp16 = reshape(shape = var_1843, x = attn_31_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> obj_109_pad_type_0 = const()[name = tensor<string, []>("obj_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_109_strides_0 = const()[name = tensor<string, []>("obj_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_109_pad_0 = const()[name = tensor<string, []>("obj_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_109_dilations_0 = const()[name = tensor<string, []>("obj_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_109_groups_0 = const()[name = tensor<string, []>("obj_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_7_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356976448)))];
+            tensor<fp16, [1024]> layers_7_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359073664)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_109_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_bias_to_fp16, dilations = obj_109_dilations_0, groups = obj_109_groups_0, pad = obj_109_pad_0, pad_type = obj_109_pad_type_0, strides = obj_109_strides_0, weight = layers_7_encoder_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_109_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_109_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = tensor<string, []>("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1861_to_fp16 = const()[name = tensor<string, []>("op_1861_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_1861_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [1024]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359075776)))];
+            tensor<fp16, [1024]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359077888)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_77_strides_0 = const()[name = tensor<string, []>("input_77_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_77_dilations_0 = const()[name = tensor<string, []>("input_77_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_77_groups_0 = const()[name = tensor<string, []>("input_77_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359080000)))];
+            tensor<fp16, [4096]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(367468672)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_77_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = input_77_dilations_0, groups = input_77_groups_0, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = input_77_strides_0, weight = layers_7_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_17_strides_0 = const()[name = tensor<string, []>("hidden_states_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_17_dilations_0 = const()[name = tensor<string, []>("hidden_states_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_17_groups_0 = const()[name = tensor<string, []>("hidden_states_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(367476928)))];
+            tensor<fp16, [1024]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375865600)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_17_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = layers_7_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_1896 = const()[name = tensor<string, []>("op_1896"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_49_axes_0 = const()[name = tensor<string, []>("out_49_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1921_to_fp16 = const()[name = tensor<string, []>("op_1921_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_1921_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("out_49_cast_fp16")];
+            tensor<fp16, [1024]> obj_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_113_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375867712)))];
+            tensor<fp16, [1024]> obj_113_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_113_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375869824)))];
+            tensor<fp16, []> obj_113_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_113_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor<string, []>("obj_113_cast_fp16")];
+            tensor<string, []> query_33_pad_type_0 = const()[name = tensor<string, []>("query_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_33_strides_0 = const()[name = tensor<string, []>("query_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_33_pad_0 = const()[name = tensor<string, []>("query_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_33_dilations_0 = const()[name = tensor<string, []>("query_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_33_groups_0 = const()[name = tensor<string, []>("query_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375871936)))];
+            tensor<fp16, [1024]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377969152)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_33_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = query_33_dilations_0, groups = query_33_groups_0, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = query_33_strides_0, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("query_33_cast_fp16")];
+            tensor<string, []> current_key_17_pad_type_0 = const()[name = tensor<string, []>("current_key_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_17_strides_0 = const()[name = tensor<string, []>("current_key_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_17_pad_0 = const()[name = tensor<string, []>("current_key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_17_dilations_0 = const()[name = tensor<string, []>("current_key_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_17_groups_0 = const()[name = tensor<string, []>("current_key_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377971264)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_17_cast_fp16 = conv(dilations = current_key_17_dilations_0, groups = current_key_17_groups_0, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = current_key_17_strides_0, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_key_17_cast_fp16")];
+            tensor<string, []> current_value_17_pad_type_0 = const()[name = tensor<string, []>("current_value_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_17_strides_0 = const()[name = tensor<string, []>("current_value_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_17_pad_0 = const()[name = tensor<string, []>("current_value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_17_dilations_0 = const()[name = tensor<string, []>("current_value_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_17_groups_0 = const()[name = tensor<string, []>("current_value_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(380068480)))];
+            tensor<fp16, [1024]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382165696)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = current_value_17_dilations_0, groups = current_value_17_groups_0, pad = current_value_17_pad_0, pad_type = current_value_17_pad_type_0, strides = current_value_17_strides_0, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_value_17_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1960_cast_fp16 = mul(x = var_87_cast_fp16_8, y = var_207_cast_fp16)[name = tensor<string, []>("op_1960_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1961_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1961_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_33_cast_fp16 = add(x = var_1960_cast_fp16, y = var_1961_cast_fp16)[name = tensor<string, []>("key_33_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1964_cast_fp16 = mul(x = var_114_cast_fp16_8, y = var_207_cast_fp16)[name = tensor<string, []>("op_1964_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_1965_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_1965_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_33_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1965_cast_fp16)[name = tensor<string, []>("value_33_cast_fp16")];
+            tensor<int32, [4]> var_1969 = const()[name = tensor<string, []>("op_1969"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_33_cast_fp16 = reshape(shape = var_1969, x = query_33_cast_fp16)[name = tensor<string, []>("mh_q_33_cast_fp16")];
+            tensor<fp16, []> var_1971_to_fp16 = const()[name = tensor<string, []>("op_1971_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_1972_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_1971_to_fp16)[name = tensor<string, []>("op_1972_cast_fp16")];
+            tensor<int32, [4]> var_1975 = const()[name = tensor<string, []>("op_1975"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1976_cast_fp16 = reshape(shape = var_1975, x = key_33_cast_fp16)[name = tensor<string, []>("op_1976_cast_fp16")];
+            tensor<bool, []> mh_w_49_transpose_x_0 = const()[name = tensor<string, []>("mh_w_49_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_49_transpose_y_0 = const()[name = tensor<string, []>("mh_w_49_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1972_cast_fp16, y = var_1976_cast_fp16)[name = tensor<string, []>("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_1984_cast_fp16 = softmax(axis = var_1896, x = mh_w_51_cast_fp16)[name = tensor<string, []>("op_1984_cast_fp16")];
+            tensor<int32, [4]> var_1985 = const()[name = tensor<string, []>("op_1985"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_1986_cast_fp16 = reshape(shape = var_1985, x = value_33_cast_fp16)[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<bool, []> attn_33_transpose_x_0 = const()[name = tensor<string, []>("attn_33_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_33_transpose_y_0 = const()[name = tensor<string, []>("attn_33_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_1986_cast_fp16, y = var_1984_cast_fp16)[name = tensor<string, []>("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_1989 = const()[name = tensor<string, []>("op_1989"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_81_cast_fp16 = reshape(shape = var_1989, x = attn_33_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> obj_119_pad_type_0 = const()[name = tensor<string, []>("obj_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_119_strides_0 = const()[name = tensor<string, []>("obj_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_119_pad_0 = const()[name = tensor<string, []>("obj_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_119_dilations_0 = const()[name = tensor<string, []>("obj_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_119_groups_0 = const()[name = tensor<string, []>("obj_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382167808)))];
+            tensor<fp16, [1024]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384265024)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_119_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = obj_119_dilations_0, groups = obj_119_groups_0, pad = obj_119_pad_0, pad_type = obj_119_pad_type_0, strides = obj_119_strides_0, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_119_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_119_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> out_51_axes_0 = const()[name = tensor<string, []>("out_51_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2011_to_fp16 = const()[name = tensor<string, []>("op_2011_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2011_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("out_51_cast_fp16")];
+            tensor<fp16, [1024]> obj_121_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_121_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384267136)))];
+            tensor<fp16, [1024]> obj_121_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_121_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384269248)))];
+            tensor<fp16, []> obj_121_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_121_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor<string, []>("obj_121_cast_fp16")];
+            tensor<string, []> query_35_pad_type_0 = const()[name = tensor<string, []>("query_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_35_strides_0 = const()[name = tensor<string, []>("query_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_35_pad_0 = const()[name = tensor<string, []>("query_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_35_dilations_0 = const()[name = tensor<string, []>("query_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_35_groups_0 = const()[name = tensor<string, []>("query_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384271360)))];
+            tensor<fp16, [1024]> layers_8_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386368576)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_35_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_bias_to_fp16, dilations = query_35_dilations_0, groups = query_35_groups_0, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = query_35_strides_0, weight = layers_8_encoder_attn_q_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor<string, []>("query_35_cast_fp16")];
+            tensor<string, []> key_35_pad_type_0 = const()[name = tensor<string, []>("key_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_35_strides_0 = const()[name = tensor<string, []>("key_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_35_pad_0 = const()[name = tensor<string, []>("key_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_35_dilations_0 = const()[name = tensor<string, []>("key_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_35_groups_0 = const()[name = tensor<string, []>("key_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386370688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_35_cast_fp16 = conv(dilations = key_35_dilations_0, groups = key_35_groups_0, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = key_35_strides_0, weight = layers_8_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_35_cast_fp16")];
+            tensor<string, []> value_35_pad_type_0 = const()[name = tensor<string, []>("value_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_35_strides_0 = const()[name = tensor<string, []>("value_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_35_pad_0 = const()[name = tensor<string, []>("value_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_35_dilations_0 = const()[name = tensor<string, []>("value_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_35_groups_0 = const()[name = tensor<string, []>("value_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388467904)))];
+            tensor<fp16, [1024]> layers_8_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(390565120)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_35_cast_fp16 = conv(bias = layers_8_encoder_attn_v_proj_bias_to_fp16, dilations = value_35_dilations_0, groups = value_35_groups_0, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = value_35_strides_0, weight = layers_8_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_35_cast_fp16")];
+            tensor<int32, [4]> var_2047 = const()[name = tensor<string, []>("op_2047"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_35_cast_fp16 = reshape(shape = var_2047, x = query_35_cast_fp16)[name = tensor<string, []>("mh_q_35_cast_fp16")];
+            tensor<fp16, []> var_2049_to_fp16 = const()[name = tensor<string, []>("op_2049_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2050_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2049_to_fp16)[name = tensor<string, []>("op_2050_cast_fp16")];
+            tensor<int32, [4]> var_2053 = const()[name = tensor<string, []>("op_2053"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2054_cast_fp16 = reshape(shape = var_2053, x = key_35_cast_fp16)[name = tensor<string, []>("op_2054_cast_fp16")];
+            tensor<bool, []> mh_w_53_transpose_x_0 = const()[name = tensor<string, []>("mh_w_53_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_53_transpose_y_0 = const()[name = tensor<string, []>("mh_w_53_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_2050_cast_fp16, y = var_2054_cast_fp16)[name = tensor<string, []>("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_125_cast_fp16 = softmax(axis = var_1896, x = mh_w_53_cast_fp16)[name = tensor<string, []>("obj_125_cast_fp16")];
+            tensor<int32, [4]> var_2058 = const()[name = tensor<string, []>("op_2058"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2059_cast_fp16 = reshape(shape = var_2058, x = value_35_cast_fp16)[name = tensor<string, []>("op_2059_cast_fp16")];
+            tensor<bool, []> attn_35_transpose_x_0 = const()[name = tensor<string, []>("attn_35_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_35_transpose_y_0 = const()[name = tensor<string, []>("attn_35_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2059_cast_fp16, y = obj_125_cast_fp16)[name = tensor<string, []>("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_2062 = const()[name = tensor<string, []>("op_2062"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_83_cast_fp16 = reshape(shape = var_2062, x = attn_35_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> obj_123_pad_type_0 = const()[name = tensor<string, []>("obj_123_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_123_strides_0 = const()[name = tensor<string, []>("obj_123_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_123_pad_0 = const()[name = tensor<string, []>("obj_123_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_123_dilations_0 = const()[name = tensor<string, []>("obj_123_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_123_groups_0 = const()[name = tensor<string, []>("obj_123_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_8_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(390567232)))];
+            tensor<fp16, [1024]> layers_8_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(392664448)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_123_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_bias_to_fp16, dilations = obj_123_dilations_0, groups = obj_123_groups_0, pad = obj_123_pad_0, pad_type = obj_123_pad_type_0, strides = obj_123_strides_0, weight = layers_8_encoder_attn_o_proj_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("obj_123_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_123_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, [1]> out_53_axes_0 = const()[name = tensor<string, []>("out_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2080_to_fp16 = const()[name = tensor<string, []>("op_2080_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2080_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("out_53_cast_fp16")];
+            tensor<fp16, [1024]> input_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_85_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(392666560)))];
+            tensor<fp16, [1024]> input_85_beta_0_to_fp16 = const()[name = tensor<string, []>("input_85_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(392668672)))];
+            tensor<fp16, []> input_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> input_87_pad_type_0 = const()[name = tensor<string, []>("input_87_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_87_strides_0 = const()[name = tensor<string, []>("input_87_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_87_pad_0 = const()[name = tensor<string, []>("input_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_87_dilations_0 = const()[name = tensor<string, []>("input_87_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_87_groups_0 = const()[name = tensor<string, []>("input_87_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(392670784)))];
+            tensor<fp16, [4096]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(401059456)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_87_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = layers_8_fc1_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_mode_0 = const()[name = tensor<string, []>("input_89_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_19_strides_0 = const()[name = tensor<string, []>("hidden_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_19_dilations_0 = const()[name = tensor<string, []>("hidden_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_19_groups_0 = const()[name = tensor<string, []>("hidden_states_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(401067712)))];
+            tensor<fp16, [1024]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409456384)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_19_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_8_fc2_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, []> var_2115 = const()[name = tensor<string, []>("op_2115"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_55_axes_0 = const()[name = tensor<string, []>("out_55_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2140_to_fp16 = const()[name = tensor<string, []>("op_2140_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2140_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("out_55_cast_fp16")];
+            tensor<fp16, [1024]> obj_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_127_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409458496)))];
+            tensor<fp16, [1024]> obj_127_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_127_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409460608)))];
+            tensor<fp16, []> obj_127_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_127_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_127_cast_fp16 = batch_norm(beta = obj_127_beta_0_to_fp16, epsilon = obj_127_epsilon_0_to_fp16, gamma = obj_127_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor<string, []>("obj_127_cast_fp16")];
+            tensor<string, []> query_37_pad_type_0 = const()[name = tensor<string, []>("query_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_37_strides_0 = const()[name = tensor<string, []>("query_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_37_pad_0 = const()[name = tensor<string, []>("query_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_37_dilations_0 = const()[name = tensor<string, []>("query_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_37_groups_0 = const()[name = tensor<string, []>("query_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409462720)))];
+            tensor<fp16, [1024]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411559936)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_37_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = query_37_dilations_0, groups = query_37_groups_0, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = query_37_strides_0, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("query_37_cast_fp16")];
+            tensor<string, []> current_key_19_pad_type_0 = const()[name = tensor<string, []>("current_key_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_19_strides_0 = const()[name = tensor<string, []>("current_key_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_19_pad_0 = const()[name = tensor<string, []>("current_key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_19_dilations_0 = const()[name = tensor<string, []>("current_key_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_19_groups_0 = const()[name = tensor<string, []>("current_key_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411562048)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_19_cast_fp16 = conv(dilations = current_key_19_dilations_0, groups = current_key_19_groups_0, pad = current_key_19_pad_0, pad_type = current_key_19_pad_type_0, strides = current_key_19_strides_0, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_key_19_cast_fp16")];
+            tensor<string, []> current_value_19_pad_type_0 = const()[name = tensor<string, []>("current_value_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_19_strides_0 = const()[name = tensor<string, []>("current_value_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_19_pad_0 = const()[name = tensor<string, []>("current_value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_19_dilations_0 = const()[name = tensor<string, []>("current_value_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_19_groups_0 = const()[name = tensor<string, []>("current_value_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413659264)))];
+            tensor<fp16, [1024]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415756480)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = current_value_19_dilations_0, groups = current_value_19_groups_0, pad = current_value_19_pad_0, pad_type = current_value_19_pad_type_0, strides = current_value_19_strides_0, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_value_19_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2179_cast_fp16 = mul(x = var_87_cast_fp16_9, y = var_207_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2180_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_37_cast_fp16 = add(x = var_2179_cast_fp16, y = var_2180_cast_fp16)[name = tensor<string, []>("key_37_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2183_cast_fp16 = mul(x = var_114_cast_fp16_9, y = var_207_cast_fp16)[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2184_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_37_cast_fp16 = add(x = var_2183_cast_fp16, y = var_2184_cast_fp16)[name = tensor<string, []>("value_37_cast_fp16")];
+            tensor<int32, [4]> var_2188 = const()[name = tensor<string, []>("op_2188"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_37_cast_fp16 = reshape(shape = var_2188, x = query_37_cast_fp16)[name = tensor<string, []>("mh_q_37_cast_fp16")];
+            tensor<fp16, []> var_2190_to_fp16 = const()[name = tensor<string, []>("op_2190_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2191_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2190_to_fp16)[name = tensor<string, []>("op_2191_cast_fp16")];
+            tensor<int32, [4]> var_2194 = const()[name = tensor<string, []>("op_2194"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2195_cast_fp16 = reshape(shape = var_2194, x = key_37_cast_fp16)[name = tensor<string, []>("op_2195_cast_fp16")];
+            tensor<bool, []> mh_w_55_transpose_x_0 = const()[name = tensor<string, []>("mh_w_55_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_55_transpose_y_0 = const()[name = tensor<string, []>("mh_w_55_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_2191_cast_fp16, y = var_2195_cast_fp16)[name = tensor<string, []>("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_2203_cast_fp16 = softmax(axis = var_2115, x = mh_w_57_cast_fp16)[name = tensor<string, []>("op_2203_cast_fp16")];
+            tensor<int32, [4]> var_2204 = const()[name = tensor<string, []>("op_2204"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2205_cast_fp16 = reshape(shape = var_2204, x = value_37_cast_fp16)[name = tensor<string, []>("op_2205_cast_fp16")];
+            tensor<bool, []> attn_37_transpose_x_0 = const()[name = tensor<string, []>("attn_37_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_37_transpose_y_0 = const()[name = tensor<string, []>("attn_37_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2205_cast_fp16, y = var_2203_cast_fp16)[name = tensor<string, []>("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2208 = const()[name = tensor<string, []>("op_2208"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_91_cast_fp16 = reshape(shape = var_2208, x = attn_37_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> obj_133_pad_type_0 = const()[name = tensor<string, []>("obj_133_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_133_strides_0 = const()[name = tensor<string, []>("obj_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_133_pad_0 = const()[name = tensor<string, []>("obj_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_133_dilations_0 = const()[name = tensor<string, []>("obj_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_133_groups_0 = const()[name = tensor<string, []>("obj_133_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415758592)))];
+            tensor<fp16, [1024]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417855808)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_133_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = obj_133_dilations_0, groups = obj_133_groups_0, pad = obj_133_pad_0, pad_type = obj_133_pad_type_0, strides = obj_133_strides_0, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("obj_133_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_133_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, [1]> out_57_axes_0 = const()[name = tensor<string, []>("out_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2230_to_fp16 = const()[name = tensor<string, []>("op_2230_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2230_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("out_57_cast_fp16")];
+            tensor<fp16, [1024]> obj_135_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_135_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417857920)))];
+            tensor<fp16, [1024]> obj_135_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_135_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417860032)))];
+            tensor<fp16, []> obj_135_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_135_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_135_cast_fp16 = batch_norm(beta = obj_135_beta_0_to_fp16, epsilon = obj_135_epsilon_0_to_fp16, gamma = obj_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor<string, []>("obj_135_cast_fp16")];
+            tensor<string, []> query_39_pad_type_0 = const()[name = tensor<string, []>("query_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_39_strides_0 = const()[name = tensor<string, []>("query_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_39_pad_0 = const()[name = tensor<string, []>("query_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_39_dilations_0 = const()[name = tensor<string, []>("query_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_39_groups_0 = const()[name = tensor<string, []>("query_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417862144)))];
+            tensor<fp16, [1024]> layers_9_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419959360)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_39_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_bias_to_fp16, dilations = query_39_dilations_0, groups = query_39_groups_0, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = query_39_strides_0, weight = layers_9_encoder_attn_q_proj_weight_to_fp16, x = obj_135_cast_fp16)[name = tensor<string, []>("query_39_cast_fp16")];
+            tensor<string, []> key_39_pad_type_0 = const()[name = tensor<string, []>("key_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_39_strides_0 = const()[name = tensor<string, []>("key_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_39_pad_0 = const()[name = tensor<string, []>("key_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_39_dilations_0 = const()[name = tensor<string, []>("key_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_39_groups_0 = const()[name = tensor<string, []>("key_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419961472)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_39_cast_fp16 = conv(dilations = key_39_dilations_0, groups = key_39_groups_0, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = key_39_strides_0, weight = layers_9_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_39_cast_fp16")];
+            tensor<string, []> value_39_pad_type_0 = const()[name = tensor<string, []>("value_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_39_strides_0 = const()[name = tensor<string, []>("value_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_39_pad_0 = const()[name = tensor<string, []>("value_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_39_dilations_0 = const()[name = tensor<string, []>("value_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_39_groups_0 = const()[name = tensor<string, []>("value_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422058688)))];
+            tensor<fp16, [1024]> layers_9_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(424155904)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_39_cast_fp16 = conv(bias = layers_9_encoder_attn_v_proj_bias_to_fp16, dilations = value_39_dilations_0, groups = value_39_groups_0, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = value_39_strides_0, weight = layers_9_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_39_cast_fp16")];
+            tensor<int32, [4]> var_2266 = const()[name = tensor<string, []>("op_2266"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_39_cast_fp16 = reshape(shape = var_2266, x = query_39_cast_fp16)[name = tensor<string, []>("mh_q_39_cast_fp16")];
+            tensor<fp16, []> var_2268_to_fp16 = const()[name = tensor<string, []>("op_2268_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2269_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2268_to_fp16)[name = tensor<string, []>("op_2269_cast_fp16")];
+            tensor<int32, [4]> var_2272 = const()[name = tensor<string, []>("op_2272"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2273_cast_fp16 = reshape(shape = var_2272, x = key_39_cast_fp16)[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<bool, []> mh_w_59_transpose_x_0 = const()[name = tensor<string, []>("mh_w_59_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_59_transpose_y_0 = const()[name = tensor<string, []>("mh_w_59_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_2269_cast_fp16, y = var_2273_cast_fp16)[name = tensor<string, []>("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_139_cast_fp16 = softmax(axis = var_2115, x = mh_w_59_cast_fp16)[name = tensor<string, []>("obj_139_cast_fp16")];
+            tensor<int32, [4]> var_2277 = const()[name = tensor<string, []>("op_2277"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2278_cast_fp16 = reshape(shape = var_2277, x = value_39_cast_fp16)[name = tensor<string, []>("op_2278_cast_fp16")];
+            tensor<bool, []> attn_39_transpose_x_0 = const()[name = tensor<string, []>("attn_39_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_39_transpose_y_0 = const()[name = tensor<string, []>("attn_39_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2278_cast_fp16, y = obj_139_cast_fp16)[name = tensor<string, []>("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2281 = const()[name = tensor<string, []>("op_2281"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_93_cast_fp16 = reshape(shape = var_2281, x = attn_39_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> obj_137_pad_type_0 = const()[name = tensor<string, []>("obj_137_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_137_strides_0 = const()[name = tensor<string, []>("obj_137_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_137_pad_0 = const()[name = tensor<string, []>("obj_137_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_137_dilations_0 = const()[name = tensor<string, []>("obj_137_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_137_groups_0 = const()[name = tensor<string, []>("obj_137_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_9_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(424158016)))];
+            tensor<fp16, [1024]> layers_9_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426255232)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_137_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_bias_to_fp16, dilations = obj_137_dilations_0, groups = obj_137_groups_0, pad = obj_137_pad_0, pad_type = obj_137_pad_type_0, strides = obj_137_strides_0, weight = layers_9_encoder_attn_o_proj_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("obj_137_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_137_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> out_59_axes_0 = const()[name = tensor<string, []>("out_59_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2299_to_fp16 = const()[name = tensor<string, []>("op_2299_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2299_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("out_59_cast_fp16")];
+            tensor<fp16, [1024]> input_95_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_95_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426257344)))];
+            tensor<fp16, [1024]> input_95_beta_0_to_fp16 = const()[name = tensor<string, []>("input_95_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426259456)))];
+            tensor<fp16, []> input_95_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_95_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> input_97_pad_type_0 = const()[name = tensor<string, []>("input_97_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_97_strides_0 = const()[name = tensor<string, []>("input_97_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_97_pad_0 = const()[name = tensor<string, []>("input_97_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_97_dilations_0 = const()[name = tensor<string, []>("input_97_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_97_groups_0 = const()[name = tensor<string, []>("input_97_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426261568)))];
+            tensor<fp16, [4096]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434650240)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_97_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = input_97_dilations_0, groups = input_97_groups_0, pad = input_97_pad_0, pad_type = input_97_pad_type_0, strides = input_97_strides_0, weight = layers_9_fc1_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_mode_0 = const()[name = tensor<string, []>("input_99_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_21_strides_0 = const()[name = tensor<string, []>("hidden_states_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_21_dilations_0 = const()[name = tensor<string, []>("hidden_states_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_21_groups_0 = const()[name = tensor<string, []>("hidden_states_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434658496)))];
+            tensor<fp16, [1024]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443047168)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_21_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = layers_9_fc2_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_2334 = const()[name = tensor<string, []>("op_2334"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_61_axes_0 = const()[name = tensor<string, []>("out_61_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2359_to_fp16 = const()[name = tensor<string, []>("op_2359_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2359_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("out_61_cast_fp16")];
+            tensor<fp16, [1024]> obj_141_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_141_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443049280)))];
+            tensor<fp16, [1024]> obj_141_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_141_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443051392)))];
+            tensor<fp16, []> obj_141_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_141_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor<string, []>("obj_141_cast_fp16")];
+            tensor<string, []> query_41_pad_type_0 = const()[name = tensor<string, []>("query_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_41_strides_0 = const()[name = tensor<string, []>("query_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_41_pad_0 = const()[name = tensor<string, []>("query_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_41_dilations_0 = const()[name = tensor<string, []>("query_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_41_groups_0 = const()[name = tensor<string, []>("query_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443053504)))];
+            tensor<fp16, [1024]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(445150720)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_41_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = query_41_dilations_0, groups = query_41_groups_0, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = query_41_strides_0, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("query_41_cast_fp16")];
+            tensor<string, []> current_key_21_pad_type_0 = const()[name = tensor<string, []>("current_key_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_21_strides_0 = const()[name = tensor<string, []>("current_key_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_21_pad_0 = const()[name = tensor<string, []>("current_key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_21_dilations_0 = const()[name = tensor<string, []>("current_key_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_21_groups_0 = const()[name = tensor<string, []>("current_key_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(445152832)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_21_cast_fp16 = conv(dilations = current_key_21_dilations_0, groups = current_key_21_groups_0, pad = current_key_21_pad_0, pad_type = current_key_21_pad_type_0, strides = current_key_21_strides_0, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_key_21_cast_fp16")];
+            tensor<string, []> current_value_21_pad_type_0 = const()[name = tensor<string, []>("current_value_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_21_strides_0 = const()[name = tensor<string, []>("current_value_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_21_pad_0 = const()[name = tensor<string, []>("current_value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_21_dilations_0 = const()[name = tensor<string, []>("current_value_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_21_groups_0 = const()[name = tensor<string, []>("current_value_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447250048)))];
+            tensor<fp16, [1024]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(449347264)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = current_value_21_dilations_0, groups = current_value_21_groups_0, pad = current_value_21_pad_0, pad_type = current_value_21_pad_type_0, strides = current_value_21_strides_0, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_value_21_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2398_cast_fp16 = mul(x = var_87_cast_fp16_10, y = var_207_cast_fp16)[name = tensor<string, []>("op_2398_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2399_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2399_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_41_cast_fp16 = add(x = var_2398_cast_fp16, y = var_2399_cast_fp16)[name = tensor<string, []>("key_41_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2402_cast_fp16 = mul(x = var_114_cast_fp16_10, y = var_207_cast_fp16)[name = tensor<string, []>("op_2402_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2403_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2403_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_41_cast_fp16 = add(x = var_2402_cast_fp16, y = var_2403_cast_fp16)[name = tensor<string, []>("value_41_cast_fp16")];
+            tensor<int32, [4]> var_2407 = const()[name = tensor<string, []>("op_2407"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_41_cast_fp16 = reshape(shape = var_2407, x = query_41_cast_fp16)[name = tensor<string, []>("mh_q_41_cast_fp16")];
+            tensor<fp16, []> var_2409_to_fp16 = const()[name = tensor<string, []>("op_2409_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2410_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_2409_to_fp16)[name = tensor<string, []>("op_2410_cast_fp16")];
+            tensor<int32, [4]> var_2413 = const()[name = tensor<string, []>("op_2413"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2414_cast_fp16 = reshape(shape = var_2413, x = key_41_cast_fp16)[name = tensor<string, []>("op_2414_cast_fp16")];
+            tensor<bool, []> mh_w_61_transpose_x_0 = const()[name = tensor<string, []>("mh_w_61_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_61_transpose_y_0 = const()[name = tensor<string, []>("mh_w_61_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2410_cast_fp16, y = var_2414_cast_fp16)[name = tensor<string, []>("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_63_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_2422_cast_fp16 = softmax(axis = var_2334, x = mh_w_63_cast_fp16)[name = tensor<string, []>("op_2422_cast_fp16")];
+            tensor<int32, [4]> var_2423 = const()[name = tensor<string, []>("op_2423"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2424_cast_fp16 = reshape(shape = var_2423, x = value_41_cast_fp16)[name = tensor<string, []>("op_2424_cast_fp16")];
+            tensor<bool, []> attn_41_transpose_x_0 = const()[name = tensor<string, []>("attn_41_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_41_transpose_y_0 = const()[name = tensor<string, []>("attn_41_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_2424_cast_fp16, y = var_2422_cast_fp16)[name = tensor<string, []>("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_2427 = const()[name = tensor<string, []>("op_2427"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_101_cast_fp16 = reshape(shape = var_2427, x = attn_41_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> obj_147_pad_type_0 = const()[name = tensor<string, []>("obj_147_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_147_strides_0 = const()[name = tensor<string, []>("obj_147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_147_pad_0 = const()[name = tensor<string, []>("obj_147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_147_dilations_0 = const()[name = tensor<string, []>("obj_147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_147_groups_0 = const()[name = tensor<string, []>("obj_147_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(449349376)))];
+            tensor<fp16, [1024]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451446592)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_147_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = obj_147_dilations_0, groups = obj_147_groups_0, pad = obj_147_pad_0, pad_type = obj_147_pad_type_0, strides = obj_147_strides_0, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("obj_147_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_147_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> out_63_axes_0 = const()[name = tensor<string, []>("out_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2449_to_fp16 = const()[name = tensor<string, []>("op_2449_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_2449_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("out_63_cast_fp16")];
+            tensor<fp16, [1024]> obj_149_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_149_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451448704)))];
+            tensor<fp16, [1024]> obj_149_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_149_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451450816)))];
+            tensor<fp16, []> obj_149_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_149_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_149_cast_fp16 = batch_norm(beta = obj_149_beta_0_to_fp16, epsilon = obj_149_epsilon_0_to_fp16, gamma = obj_149_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor<string, []>("obj_149_cast_fp16")];
+            tensor<string, []> query_43_pad_type_0 = const()[name = tensor<string, []>("query_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_43_strides_0 = const()[name = tensor<string, []>("query_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_43_pad_0 = const()[name = tensor<string, []>("query_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_43_dilations_0 = const()[name = tensor<string, []>("query_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_43_groups_0 = const()[name = tensor<string, []>("query_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451452928)))];
+            tensor<fp16, [1024]> layers_10_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(453550144)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_43_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_bias_to_fp16, dilations = query_43_dilations_0, groups = query_43_groups_0, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = query_43_strides_0, weight = layers_10_encoder_attn_q_proj_weight_to_fp16, x = obj_149_cast_fp16)[name = tensor<string, []>("query_43_cast_fp16")];
+            tensor<string, []> key_43_pad_type_0 = const()[name = tensor<string, []>("key_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_43_strides_0 = const()[name = tensor<string, []>("key_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_43_pad_0 = const()[name = tensor<string, []>("key_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_43_dilations_0 = const()[name = tensor<string, []>("key_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_43_groups_0 = const()[name = tensor<string, []>("key_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(453552256)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_43_cast_fp16 = conv(dilations = key_43_dilations_0, groups = key_43_groups_0, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = key_43_strides_0, weight = layers_10_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_43_cast_fp16")];
+            tensor<string, []> value_43_pad_type_0 = const()[name = tensor<string, []>("value_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_43_strides_0 = const()[name = tensor<string, []>("value_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_43_pad_0 = const()[name = tensor<string, []>("value_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_43_dilations_0 = const()[name = tensor<string, []>("value_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_43_groups_0 = const()[name = tensor<string, []>("value_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455649472)))];
+            tensor<fp16, [1024]> layers_10_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457746688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_43_cast_fp16 = conv(bias = layers_10_encoder_attn_v_proj_bias_to_fp16, dilations = value_43_dilations_0, groups = value_43_groups_0, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = value_43_strides_0, weight = layers_10_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_43_cast_fp16")];
+            tensor<int32, [4]> var_2485 = const()[name = tensor<string, []>("op_2485"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_43_cast_fp16 = reshape(shape = var_2485, x = query_43_cast_fp16)[name = tensor<string, []>("mh_q_43_cast_fp16")];
+            tensor<fp16, []> var_2487_to_fp16 = const()[name = tensor<string, []>("op_2487_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2488_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_2487_to_fp16)[name = tensor<string, []>("op_2488_cast_fp16")];
+            tensor<int32, [4]> var_2491 = const()[name = tensor<string, []>("op_2491"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2492_cast_fp16 = reshape(shape = var_2491, x = key_43_cast_fp16)[name = tensor<string, []>("op_2492_cast_fp16")];
+            tensor<bool, []> mh_w_65_transpose_x_0 = const()[name = tensor<string, []>("mh_w_65_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_65_transpose_y_0 = const()[name = tensor<string, []>("mh_w_65_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2488_cast_fp16, y = var_2492_cast_fp16)[name = tensor<string, []>("mh_w_65_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_153_cast_fp16 = softmax(axis = var_2334, x = mh_w_65_cast_fp16)[name = tensor<string, []>("obj_153_cast_fp16")];
+            tensor<int32, [4]> var_2496 = const()[name = tensor<string, []>("op_2496"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2497_cast_fp16 = reshape(shape = var_2496, x = value_43_cast_fp16)[name = tensor<string, []>("op_2497_cast_fp16")];
+            tensor<bool, []> attn_43_transpose_x_0 = const()[name = tensor<string, []>("attn_43_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_43_transpose_y_0 = const()[name = tensor<string, []>("attn_43_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_2497_cast_fp16, y = obj_153_cast_fp16)[name = tensor<string, []>("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_2500 = const()[name = tensor<string, []>("op_2500"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_103_cast_fp16 = reshape(shape = var_2500, x = attn_43_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> obj_151_pad_type_0 = const()[name = tensor<string, []>("obj_151_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_151_strides_0 = const()[name = tensor<string, []>("obj_151_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_151_pad_0 = const()[name = tensor<string, []>("obj_151_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_151_dilations_0 = const()[name = tensor<string, []>("obj_151_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_151_groups_0 = const()[name = tensor<string, []>("obj_151_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_10_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457748800)))];
+            tensor<fp16, [1024]> layers_10_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459846016)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_151_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_bias_to_fp16, dilations = obj_151_dilations_0, groups = obj_151_groups_0, pad = obj_151_pad_0, pad_type = obj_151_pad_type_0, strides = obj_151_strides_0, weight = layers_10_encoder_attn_o_proj_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("obj_151_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_151_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, [1]> out_65_axes_0 = const()[name = tensor<string, []>("out_65_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2518_to_fp16 = const()[name = tensor<string, []>("op_2518_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_2518_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("out_65_cast_fp16")];
+            tensor<fp16, [1024]> input_105_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_105_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459848128)))];
+            tensor<fp16, [1024]> input_105_beta_0_to_fp16 = const()[name = tensor<string, []>("input_105_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459850240)))];
+            tensor<fp16, []> input_105_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_105_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> input_107_pad_type_0 = const()[name = tensor<string, []>("input_107_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_107_strides_0 = const()[name = tensor<string, []>("input_107_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_107_pad_0 = const()[name = tensor<string, []>("input_107_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_107_dilations_0 = const()[name = tensor<string, []>("input_107_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_107_groups_0 = const()[name = tensor<string, []>("input_107_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459852352)))];
+            tensor<fp16, [4096]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(468241024)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_107_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = input_107_dilations_0, groups = input_107_groups_0, pad = input_107_pad_0, pad_type = input_107_pad_type_0, strides = input_107_strides_0, weight = layers_10_fc1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = tensor<string, []>("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = tensor<string, []>("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_23_groups_0 = const()[name = tensor<string, []>("hidden_states_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(468249280)))];
+            tensor<fp16, [1024]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476637952)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_23_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = layers_10_fc2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, []> var_2553 = const()[name = tensor<string, []>("op_2553"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_67_axes_0 = const()[name = tensor<string, []>("out_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2578_to_fp16 = const()[name = tensor<string, []>("op_2578_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_2578_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("out_67_cast_fp16")];
+            tensor<fp16, [1024]> obj_155_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_155_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476640064)))];
+            tensor<fp16, [1024]> obj_155_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_155_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476642176)))];
+            tensor<fp16, []> obj_155_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_155_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_155_cast_fp16 = batch_norm(beta = obj_155_beta_0_to_fp16, epsilon = obj_155_epsilon_0_to_fp16, gamma = obj_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor<string, []>("obj_155_cast_fp16")];
+            tensor<string, []> query_45_pad_type_0 = const()[name = tensor<string, []>("query_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_45_strides_0 = const()[name = tensor<string, []>("query_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_45_pad_0 = const()[name = tensor<string, []>("query_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_45_dilations_0 = const()[name = tensor<string, []>("query_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_45_groups_0 = const()[name = tensor<string, []>("query_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476644288)))];
+            tensor<fp16, [1024]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(478741504)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_45_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = query_45_dilations_0, groups = query_45_groups_0, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = query_45_strides_0, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("query_45_cast_fp16")];
+            tensor<string, []> current_key_23_pad_type_0 = const()[name = tensor<string, []>("current_key_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_23_strides_0 = const()[name = tensor<string, []>("current_key_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_23_pad_0 = const()[name = tensor<string, []>("current_key_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_23_dilations_0 = const()[name = tensor<string, []>("current_key_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_23_groups_0 = const()[name = tensor<string, []>("current_key_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(478743616)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_23_cast_fp16 = conv(dilations = current_key_23_dilations_0, groups = current_key_23_groups_0, pad = current_key_23_pad_0, pad_type = current_key_23_pad_type_0, strides = current_key_23_strides_0, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_key_23_cast_fp16")];
+            tensor<string, []> current_value_23_pad_type_0 = const()[name = tensor<string, []>("current_value_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_23_strides_0 = const()[name = tensor<string, []>("current_value_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_23_pad_0 = const()[name = tensor<string, []>("current_value_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_23_dilations_0 = const()[name = tensor<string, []>("current_value_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_23_groups_0 = const()[name = tensor<string, []>("current_value_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480840832)))];
+            tensor<fp16, [1024]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482938048)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_23_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = current_value_23_dilations_0, groups = current_value_23_groups_0, pad = current_value_23_pad_0, pad_type = current_value_23_pad_type_0, strides = current_value_23_strides_0, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_value_23_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2617_cast_fp16 = mul(x = var_87_cast_fp16_11, y = var_207_cast_fp16)[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2618_cast_fp16 = mul(x = current_key_23_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2618_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_45_cast_fp16 = add(x = var_2617_cast_fp16, y = var_2618_cast_fp16)[name = tensor<string, []>("key_45_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2621_cast_fp16 = mul(x = var_114_cast_fp16_11, y = var_207_cast_fp16)[name = tensor<string, []>("op_2621_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2622_cast_fp16 = mul(x = current_value_23_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2622_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_45_cast_fp16 = add(x = var_2621_cast_fp16, y = var_2622_cast_fp16)[name = tensor<string, []>("value_45_cast_fp16")];
+            tensor<int32, [4]> var_2626 = const()[name = tensor<string, []>("op_2626"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_45_cast_fp16 = reshape(shape = var_2626, x = query_45_cast_fp16)[name = tensor<string, []>("mh_q_45_cast_fp16")];
+            tensor<fp16, []> var_2628_to_fp16 = const()[name = tensor<string, []>("op_2628_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2629_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_2628_to_fp16)[name = tensor<string, []>("op_2629_cast_fp16")];
+            tensor<int32, [4]> var_2632 = const()[name = tensor<string, []>("op_2632"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2633_cast_fp16 = reshape(shape = var_2632, x = key_45_cast_fp16)[name = tensor<string, []>("op_2633_cast_fp16")];
+            tensor<bool, []> mh_w_67_transpose_x_0 = const()[name = tensor<string, []>("mh_w_67_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_67_transpose_y_0 = const()[name = tensor<string, []>("mh_w_67_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_2629_cast_fp16, y = var_2633_cast_fp16)[name = tensor<string, []>("mh_w_67_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_69_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_2641_cast_fp16 = softmax(axis = var_2553, x = mh_w_69_cast_fp16)[name = tensor<string, []>("op_2641_cast_fp16")];
+            tensor<int32, [4]> var_2642 = const()[name = tensor<string, []>("op_2642"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2643_cast_fp16 = reshape(shape = var_2642, x = value_45_cast_fp16)[name = tensor<string, []>("op_2643_cast_fp16")];
+            tensor<bool, []> attn_45_transpose_x_0 = const()[name = tensor<string, []>("attn_45_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_45_transpose_y_0 = const()[name = tensor<string, []>("attn_45_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_2643_cast_fp16, y = var_2641_cast_fp16)[name = tensor<string, []>("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_2646 = const()[name = tensor<string, []>("op_2646"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_111_cast_fp16 = reshape(shape = var_2646, x = attn_45_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> obj_161_pad_type_0 = const()[name = tensor<string, []>("obj_161_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_161_strides_0 = const()[name = tensor<string, []>("obj_161_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_161_pad_0 = const()[name = tensor<string, []>("obj_161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_161_dilations_0 = const()[name = tensor<string, []>("obj_161_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_161_groups_0 = const()[name = tensor<string, []>("obj_161_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482940160)))];
+            tensor<fp16, [1024]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485037376)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_161_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = obj_161_dilations_0, groups = obj_161_groups_0, pad = obj_161_pad_0, pad_type = obj_161_pad_type_0, strides = obj_161_strides_0, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("obj_161_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_161_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, [1]> out_69_axes_0 = const()[name = tensor<string, []>("out_69_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2668_to_fp16 = const()[name = tensor<string, []>("op_2668_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_2668_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("out_69_cast_fp16")];
+            tensor<fp16, [1024]> obj_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_163_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485039488)))];
+            tensor<fp16, [1024]> obj_163_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_163_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485041600)))];
+            tensor<fp16, []> obj_163_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_163_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_163_cast_fp16 = batch_norm(beta = obj_163_beta_0_to_fp16, epsilon = obj_163_epsilon_0_to_fp16, gamma = obj_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor<string, []>("obj_163_cast_fp16")];
+            tensor<string, []> query_47_pad_type_0 = const()[name = tensor<string, []>("query_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_47_strides_0 = const()[name = tensor<string, []>("query_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_47_pad_0 = const()[name = tensor<string, []>("query_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_47_dilations_0 = const()[name = tensor<string, []>("query_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_47_groups_0 = const()[name = tensor<string, []>("query_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485043712)))];
+            tensor<fp16, [1024]> layers_11_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487140928)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_47_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_bias_to_fp16, dilations = query_47_dilations_0, groups = query_47_groups_0, pad = query_47_pad_0, pad_type = query_47_pad_type_0, strides = query_47_strides_0, weight = layers_11_encoder_attn_q_proj_weight_to_fp16, x = obj_163_cast_fp16)[name = tensor<string, []>("query_47_cast_fp16")];
+            tensor<string, []> key_47_pad_type_0 = const()[name = tensor<string, []>("key_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_47_strides_0 = const()[name = tensor<string, []>("key_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_47_pad_0 = const()[name = tensor<string, []>("key_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_47_dilations_0 = const()[name = tensor<string, []>("key_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_47_groups_0 = const()[name = tensor<string, []>("key_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487143040)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_47_cast_fp16 = conv(dilations = key_47_dilations_0, groups = key_47_groups_0, pad = key_47_pad_0, pad_type = key_47_pad_type_0, strides = key_47_strides_0, weight = layers_11_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_47_cast_fp16")];
+            tensor<string, []> value_47_pad_type_0 = const()[name = tensor<string, []>("value_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_47_strides_0 = const()[name = tensor<string, []>("value_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_47_pad_0 = const()[name = tensor<string, []>("value_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_47_dilations_0 = const()[name = tensor<string, []>("value_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_47_groups_0 = const()[name = tensor<string, []>("value_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(489240256)))];
+            tensor<fp16, [1024]> layers_11_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491337472)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_47_cast_fp16 = conv(bias = layers_11_encoder_attn_v_proj_bias_to_fp16, dilations = value_47_dilations_0, groups = value_47_groups_0, pad = value_47_pad_0, pad_type = value_47_pad_type_0, strides = value_47_strides_0, weight = layers_11_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_47_cast_fp16")];
+            tensor<int32, [4]> var_2704 = const()[name = tensor<string, []>("op_2704"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_47_cast_fp16 = reshape(shape = var_2704, x = query_47_cast_fp16)[name = tensor<string, []>("mh_q_47_cast_fp16")];
+            tensor<fp16, []> var_2706_to_fp16 = const()[name = tensor<string, []>("op_2706_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2707_cast_fp16 = mul(x = mh_q_47_cast_fp16, y = var_2706_to_fp16)[name = tensor<string, []>("op_2707_cast_fp16")];
+            tensor<int32, [4]> var_2710 = const()[name = tensor<string, []>("op_2710"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2711_cast_fp16 = reshape(shape = var_2710, x = key_47_cast_fp16)[name = tensor<string, []>("op_2711_cast_fp16")];
+            tensor<bool, []> mh_w_71_transpose_x_0 = const()[name = tensor<string, []>("mh_w_71_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_71_transpose_y_0 = const()[name = tensor<string, []>("mh_w_71_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_71_cast_fp16 = matmul(transpose_x = mh_w_71_transpose_x_0, transpose_y = mh_w_71_transpose_y_0, x = var_2707_cast_fp16, y = var_2711_cast_fp16)[name = tensor<string, []>("mh_w_71_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_167_cast_fp16 = softmax(axis = var_2553, x = mh_w_71_cast_fp16)[name = tensor<string, []>("obj_167_cast_fp16")];
+            tensor<int32, [4]> var_2715 = const()[name = tensor<string, []>("op_2715"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2716_cast_fp16 = reshape(shape = var_2715, x = value_47_cast_fp16)[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<bool, []> attn_47_transpose_x_0 = const()[name = tensor<string, []>("attn_47_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_47_transpose_y_0 = const()[name = tensor<string, []>("attn_47_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_47_cast_fp16 = matmul(transpose_x = attn_47_transpose_x_0, transpose_y = attn_47_transpose_y_0, x = var_2716_cast_fp16, y = obj_167_cast_fp16)[name = tensor<string, []>("attn_47_cast_fp16")];
+            tensor<int32, [4]> var_2719 = const()[name = tensor<string, []>("op_2719"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_113_cast_fp16 = reshape(shape = var_2719, x = attn_47_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> obj_165_pad_type_0 = const()[name = tensor<string, []>("obj_165_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_165_strides_0 = const()[name = tensor<string, []>("obj_165_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_165_pad_0 = const()[name = tensor<string, []>("obj_165_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_165_dilations_0 = const()[name = tensor<string, []>("obj_165_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_165_groups_0 = const()[name = tensor<string, []>("obj_165_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_11_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491339584)))];
+            tensor<fp16, [1024]> layers_11_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493436800)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_165_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_bias_to_fp16, dilations = obj_165_dilations_0, groups = obj_165_groups_0, pad = obj_165_pad_0, pad_type = obj_165_pad_type_0, strides = obj_165_strides_0, weight = layers_11_encoder_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("obj_165_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_165_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> out_71_axes_0 = const()[name = tensor<string, []>("out_71_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2737_to_fp16 = const()[name = tensor<string, []>("op_2737_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_2737_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("out_71_cast_fp16")];
+            tensor<fp16, [1024]> input_115_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_115_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493438912)))];
+            tensor<fp16, [1024]> input_115_beta_0_to_fp16 = const()[name = tensor<string, []>("input_115_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493441024)))];
+            tensor<fp16, []> input_115_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_115_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> input_117_pad_type_0 = const()[name = tensor<string, []>("input_117_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_117_strides_0 = const()[name = tensor<string, []>("input_117_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_117_pad_0 = const()[name = tensor<string, []>("input_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_117_dilations_0 = const()[name = tensor<string, []>("input_117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_117_groups_0 = const()[name = tensor<string, []>("input_117_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493443136)))];
+            tensor<fp16, [4096]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(501831808)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_117_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = layers_11_fc1_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_mode_0 = const()[name = tensor<string, []>("input_119_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_25_strides_0 = const()[name = tensor<string, []>("hidden_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_25_dilations_0 = const()[name = tensor<string, []>("hidden_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_25_groups_0 = const()[name = tensor<string, []>("hidden_states_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(501840064)))];
+            tensor<fp16, [1024]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510228736)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_25_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = hidden_states_25_dilations_0, groups = hidden_states_25_groups_0, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = hidden_states_25_strides_0, weight = layers_11_fc2_weight_to_fp16, x = input_119_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_2772 = const()[name = tensor<string, []>("op_2772"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_73_axes_0 = const()[name = tensor<string, []>("out_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2797_to_fp16 = const()[name = tensor<string, []>("op_2797_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_2797_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("out_73_cast_fp16")];
+            tensor<fp16, [1024]> obj_169_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_169_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510230848)))];
+            tensor<fp16, [1024]> obj_169_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_169_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510232960)))];
+            tensor<fp16, []> obj_169_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_169_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_169_cast_fp16 = batch_norm(beta = obj_169_beta_0_to_fp16, epsilon = obj_169_epsilon_0_to_fp16, gamma = obj_169_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = tensor<string, []>("obj_169_cast_fp16")];
+            tensor<string, []> query_49_pad_type_0 = const()[name = tensor<string, []>("query_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_49_strides_0 = const()[name = tensor<string, []>("query_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_49_pad_0 = const()[name = tensor<string, []>("query_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_49_dilations_0 = const()[name = tensor<string, []>("query_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_49_groups_0 = const()[name = tensor<string, []>("query_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510235072)))];
+            tensor<fp16, [1024]> layers_12_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512332288)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_49_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_bias_to_fp16, dilations = query_49_dilations_0, groups = query_49_groups_0, pad = query_49_pad_0, pad_type = query_49_pad_type_0, strides = query_49_strides_0, weight = layers_12_self_attn_q_proj_weight_to_fp16, x = obj_169_cast_fp16)[name = tensor<string, []>("query_49_cast_fp16")];
+            tensor<string, []> current_key_25_pad_type_0 = const()[name = tensor<string, []>("current_key_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_25_strides_0 = const()[name = tensor<string, []>("current_key_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_25_pad_0 = const()[name = tensor<string, []>("current_key_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_25_dilations_0 = const()[name = tensor<string, []>("current_key_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_25_groups_0 = const()[name = tensor<string, []>("current_key_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512334400)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_25_cast_fp16 = conv(dilations = current_key_25_dilations_0, groups = current_key_25_groups_0, pad = current_key_25_pad_0, pad_type = current_key_25_pad_type_0, strides = current_key_25_strides_0, weight = layers_12_self_attn_k_proj_weight_to_fp16, x = obj_169_cast_fp16)[name = tensor<string, []>("current_key_25_cast_fp16")];
+            tensor<string, []> current_value_25_pad_type_0 = const()[name = tensor<string, []>("current_value_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_25_strides_0 = const()[name = tensor<string, []>("current_value_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_25_pad_0 = const()[name = tensor<string, []>("current_value_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_25_dilations_0 = const()[name = tensor<string, []>("current_value_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_25_groups_0 = const()[name = tensor<string, []>("current_value_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(514431616)))];
+            tensor<fp16, [1024]> layers_12_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(516528832)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_25_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_bias_to_fp16, dilations = current_value_25_dilations_0, groups = current_value_25_groups_0, pad = current_value_25_pad_0, pad_type = current_value_25_pad_type_0, strides = current_value_25_strides_0, weight = layers_12_self_attn_v_proj_weight_to_fp16, x = obj_169_cast_fp16)[name = tensor<string, []>("current_value_25_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2836_cast_fp16 = mul(x = var_87_cast_fp16_12, y = var_207_cast_fp16)[name = tensor<string, []>("op_2836_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2837_cast_fp16 = mul(x = current_key_25_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2837_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_49_cast_fp16 = add(x = var_2836_cast_fp16, y = var_2837_cast_fp16)[name = tensor<string, []>("key_49_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2840_cast_fp16 = mul(x = var_114_cast_fp16_12, y = var_207_cast_fp16)[name = tensor<string, []>("op_2840_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_2841_cast_fp16 = mul(x = current_value_25_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_2841_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_49_cast_fp16 = add(x = var_2840_cast_fp16, y = var_2841_cast_fp16)[name = tensor<string, []>("value_49_cast_fp16")];
+            tensor<int32, [4]> var_2845 = const()[name = tensor<string, []>("op_2845"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_49_cast_fp16 = reshape(shape = var_2845, x = query_49_cast_fp16)[name = tensor<string, []>("mh_q_49_cast_fp16")];
+            tensor<fp16, []> var_2847_to_fp16 = const()[name = tensor<string, []>("op_2847_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2848_cast_fp16 = mul(x = mh_q_49_cast_fp16, y = var_2847_to_fp16)[name = tensor<string, []>("op_2848_cast_fp16")];
+            tensor<int32, [4]> var_2851 = const()[name = tensor<string, []>("op_2851"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2852_cast_fp16 = reshape(shape = var_2851, x = key_49_cast_fp16)[name = tensor<string, []>("op_2852_cast_fp16")];
+            tensor<bool, []> mh_w_73_transpose_x_0 = const()[name = tensor<string, []>("mh_w_73_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_73_transpose_y_0 = const()[name = tensor<string, []>("mh_w_73_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_2848_cast_fp16, y = var_2852_cast_fp16)[name = tensor<string, []>("mh_w_73_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_75_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_2860_cast_fp16 = softmax(axis = var_2772, x = mh_w_75_cast_fp16)[name = tensor<string, []>("op_2860_cast_fp16")];
+            tensor<int32, [4]> var_2861 = const()[name = tensor<string, []>("op_2861"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_2862_cast_fp16 = reshape(shape = var_2861, x = value_49_cast_fp16)[name = tensor<string, []>("op_2862_cast_fp16")];
+            tensor<bool, []> attn_49_transpose_x_0 = const()[name = tensor<string, []>("attn_49_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_49_transpose_y_0 = const()[name = tensor<string, []>("attn_49_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_49_cast_fp16 = matmul(transpose_x = attn_49_transpose_x_0, transpose_y = attn_49_transpose_y_0, x = var_2862_cast_fp16, y = var_2860_cast_fp16)[name = tensor<string, []>("attn_49_cast_fp16")];
+            tensor<int32, [4]> var_2865 = const()[name = tensor<string, []>("op_2865"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_121_cast_fp16 = reshape(shape = var_2865, x = attn_49_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> obj_175_pad_type_0 = const()[name = tensor<string, []>("obj_175_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_175_strides_0 = const()[name = tensor<string, []>("obj_175_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_175_pad_0 = const()[name = tensor<string, []>("obj_175_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_175_dilations_0 = const()[name = tensor<string, []>("obj_175_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_175_groups_0 = const()[name = tensor<string, []>("obj_175_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(516530944)))];
+            tensor<fp16, [1024]> layers_12_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(518628160)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_175_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_bias_to_fp16, dilations = obj_175_dilations_0, groups = obj_175_groups_0, pad = obj_175_pad_0, pad_type = obj_175_pad_type_0, strides = obj_175_strides_0, weight = layers_12_self_attn_o_proj_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("obj_175_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_175_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> out_75_axes_0 = const()[name = tensor<string, []>("out_75_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2887_to_fp16 = const()[name = tensor<string, []>("op_2887_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_75_cast_fp16 = layer_norm(axes = out_75_axes_0, epsilon = var_2887_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("out_75_cast_fp16")];
+            tensor<fp16, [1024]> obj_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_177_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(518630272)))];
+            tensor<fp16, [1024]> obj_177_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_177_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(518632384)))];
+            tensor<fp16, []> obj_177_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_177_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_177_cast_fp16 = batch_norm(beta = obj_177_beta_0_to_fp16, epsilon = obj_177_epsilon_0_to_fp16, gamma = obj_177_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = tensor<string, []>("obj_177_cast_fp16")];
+            tensor<string, []> query_51_pad_type_0 = const()[name = tensor<string, []>("query_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_51_strides_0 = const()[name = tensor<string, []>("query_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_51_pad_0 = const()[name = tensor<string, []>("query_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_51_dilations_0 = const()[name = tensor<string, []>("query_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_51_groups_0 = const()[name = tensor<string, []>("query_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(518634496)))];
+            tensor<fp16, [1024]> layers_12_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520731712)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_51_cast_fp16 = conv(bias = layers_12_encoder_attn_q_proj_bias_to_fp16, dilations = query_51_dilations_0, groups = query_51_groups_0, pad = query_51_pad_0, pad_type = query_51_pad_type_0, strides = query_51_strides_0, weight = layers_12_encoder_attn_q_proj_weight_to_fp16, x = obj_177_cast_fp16)[name = tensor<string, []>("query_51_cast_fp16")];
+            tensor<string, []> key_51_pad_type_0 = const()[name = tensor<string, []>("key_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_51_strides_0 = const()[name = tensor<string, []>("key_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_51_pad_0 = const()[name = tensor<string, []>("key_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_51_dilations_0 = const()[name = tensor<string, []>("key_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_51_groups_0 = const()[name = tensor<string, []>("key_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520733824)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_51_cast_fp16 = conv(dilations = key_51_dilations_0, groups = key_51_groups_0, pad = key_51_pad_0, pad_type = key_51_pad_type_0, strides = key_51_strides_0, weight = layers_12_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_51_cast_fp16")];
+            tensor<string, []> value_51_pad_type_0 = const()[name = tensor<string, []>("value_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_51_strides_0 = const()[name = tensor<string, []>("value_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_51_pad_0 = const()[name = tensor<string, []>("value_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_51_dilations_0 = const()[name = tensor<string, []>("value_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_51_groups_0 = const()[name = tensor<string, []>("value_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522831040)))];
+            tensor<fp16, [1024]> layers_12_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(524928256)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_51_cast_fp16 = conv(bias = layers_12_encoder_attn_v_proj_bias_to_fp16, dilations = value_51_dilations_0, groups = value_51_groups_0, pad = value_51_pad_0, pad_type = value_51_pad_type_0, strides = value_51_strides_0, weight = layers_12_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_51_cast_fp16")];
+            tensor<int32, [4]> var_2923 = const()[name = tensor<string, []>("op_2923"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_51_cast_fp16 = reshape(shape = var_2923, x = query_51_cast_fp16)[name = tensor<string, []>("mh_q_51_cast_fp16")];
+            tensor<fp16, []> var_2925_to_fp16 = const()[name = tensor<string, []>("op_2925_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_2926_cast_fp16 = mul(x = mh_q_51_cast_fp16, y = var_2925_to_fp16)[name = tensor<string, []>("op_2926_cast_fp16")];
+            tensor<int32, [4]> var_2929 = const()[name = tensor<string, []>("op_2929"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2930_cast_fp16 = reshape(shape = var_2929, x = key_51_cast_fp16)[name = tensor<string, []>("op_2930_cast_fp16")];
+            tensor<bool, []> mh_w_77_transpose_x_0 = const()[name = tensor<string, []>("mh_w_77_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_77_transpose_y_0 = const()[name = tensor<string, []>("mh_w_77_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_2926_cast_fp16, y = var_2930_cast_fp16)[name = tensor<string, []>("mh_w_77_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_181_cast_fp16 = softmax(axis = var_2772, x = mh_w_77_cast_fp16)[name = tensor<string, []>("obj_181_cast_fp16")];
+            tensor<int32, [4]> var_2934 = const()[name = tensor<string, []>("op_2934"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_2935_cast_fp16 = reshape(shape = var_2934, x = value_51_cast_fp16)[name = tensor<string, []>("op_2935_cast_fp16")];
+            tensor<bool, []> attn_51_transpose_x_0 = const()[name = tensor<string, []>("attn_51_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_51_transpose_y_0 = const()[name = tensor<string, []>("attn_51_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_51_cast_fp16 = matmul(transpose_x = attn_51_transpose_x_0, transpose_y = attn_51_transpose_y_0, x = var_2935_cast_fp16, y = obj_181_cast_fp16)[name = tensor<string, []>("attn_51_cast_fp16")];
+            tensor<int32, [4]> var_2938 = const()[name = tensor<string, []>("op_2938"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_123_cast_fp16 = reshape(shape = var_2938, x = attn_51_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> obj_179_pad_type_0 = const()[name = tensor<string, []>("obj_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_179_strides_0 = const()[name = tensor<string, []>("obj_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_179_pad_0 = const()[name = tensor<string, []>("obj_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_179_dilations_0 = const()[name = tensor<string, []>("obj_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_179_groups_0 = const()[name = tensor<string, []>("obj_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_12_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(524930368)))];
+            tensor<fp16, [1024]> layers_12_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(527027584)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_179_cast_fp16 = conv(bias = layers_12_encoder_attn_o_proj_bias_to_fp16, dilations = obj_179_dilations_0, groups = obj_179_groups_0, pad = obj_179_pad_0, pad_type = obj_179_pad_type_0, strides = obj_179_strides_0, weight = layers_12_encoder_attn_o_proj_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("obj_179_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = obj_179_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, [1]> out_77_axes_0 = const()[name = tensor<string, []>("out_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2956_to_fp16 = const()[name = tensor<string, []>("op_2956_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_77_cast_fp16 = layer_norm(axes = out_77_axes_0, epsilon = var_2956_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("out_77_cast_fp16")];
+            tensor<fp16, [1024]> input_125_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_125_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(527029696)))];
+            tensor<fp16, [1024]> input_125_beta_0_to_fp16 = const()[name = tensor<string, []>("input_125_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(527031808)))];
+            tensor<fp16, []> input_125_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_125_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_125_cast_fp16 = batch_norm(beta = input_125_beta_0_to_fp16, epsilon = input_125_epsilon_0_to_fp16, gamma = input_125_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> input_127_pad_type_0 = const()[name = tensor<string, []>("input_127_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_127_strides_0 = const()[name = tensor<string, []>("input_127_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_127_pad_0 = const()[name = tensor<string, []>("input_127_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_127_dilations_0 = const()[name = tensor<string, []>("input_127_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_127_groups_0 = const()[name = tensor<string, []>("input_127_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_12_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(527033920)))];
+            tensor<fp16, [4096]> layers_12_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535422592)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_127_cast_fp16 = conv(bias = layers_12_fc1_bias_to_fp16, dilations = input_127_dilations_0, groups = input_127_groups_0, pad = input_127_pad_0, pad_type = input_127_pad_type_0, strides = input_127_strides_0, weight = layers_12_fc1_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_mode_0 = const()[name = tensor<string, []>("input_129_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_129_cast_fp16 = gelu(mode = input_129_mode_0, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> hidden_states_27_pad_type_0 = const()[name = tensor<string, []>("hidden_states_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_27_strides_0 = const()[name = tensor<string, []>("hidden_states_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_27_pad_0 = const()[name = tensor<string, []>("hidden_states_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_27_dilations_0 = const()[name = tensor<string, []>("hidden_states_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_27_groups_0 = const()[name = tensor<string, []>("hidden_states_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_12_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_12_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535430848)))];
+            tensor<fp16, [1024]> layers_12_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_12_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543819520)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_27_cast_fp16 = conv(bias = layers_12_fc2_bias_to_fp16, dilations = hidden_states_27_dilations_0, groups = hidden_states_27_groups_0, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = hidden_states_27_strides_0, weight = layers_12_fc2_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("hidden_states_27_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = hidden_states_27_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, []> var_2991 = const()[name = tensor<string, []>("op_2991"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_79_axes_0 = const()[name = tensor<string, []>("out_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3016_to_fp16 = const()[name = tensor<string, []>("op_3016_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_3016_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("out_79_cast_fp16")];
+            tensor<fp16, [1024]> obj_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_183_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543821632)))];
+            tensor<fp16, [1024]> obj_183_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_183_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543823744)))];
+            tensor<fp16, []> obj_183_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_183_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_183_cast_fp16 = batch_norm(beta = obj_183_beta_0_to_fp16, epsilon = obj_183_epsilon_0_to_fp16, gamma = obj_183_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = tensor<string, []>("obj_183_cast_fp16")];
+            tensor<string, []> query_53_pad_type_0 = const()[name = tensor<string, []>("query_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_53_strides_0 = const()[name = tensor<string, []>("query_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_53_pad_0 = const()[name = tensor<string, []>("query_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_53_dilations_0 = const()[name = tensor<string, []>("query_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_53_groups_0 = const()[name = tensor<string, []>("query_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543825856)))];
+            tensor<fp16, [1024]> layers_13_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545923072)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_53_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_bias_to_fp16, dilations = query_53_dilations_0, groups = query_53_groups_0, pad = query_53_pad_0, pad_type = query_53_pad_type_0, strides = query_53_strides_0, weight = layers_13_self_attn_q_proj_weight_to_fp16, x = obj_183_cast_fp16)[name = tensor<string, []>("query_53_cast_fp16")];
+            tensor<string, []> current_key_27_pad_type_0 = const()[name = tensor<string, []>("current_key_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_27_strides_0 = const()[name = tensor<string, []>("current_key_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_27_pad_0 = const()[name = tensor<string, []>("current_key_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_27_dilations_0 = const()[name = tensor<string, []>("current_key_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_27_groups_0 = const()[name = tensor<string, []>("current_key_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545925184)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_27_cast_fp16 = conv(dilations = current_key_27_dilations_0, groups = current_key_27_groups_0, pad = current_key_27_pad_0, pad_type = current_key_27_pad_type_0, strides = current_key_27_strides_0, weight = layers_13_self_attn_k_proj_weight_to_fp16, x = obj_183_cast_fp16)[name = tensor<string, []>("current_key_27_cast_fp16")];
+            tensor<string, []> current_value_27_pad_type_0 = const()[name = tensor<string, []>("current_value_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_27_strides_0 = const()[name = tensor<string, []>("current_value_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_27_pad_0 = const()[name = tensor<string, []>("current_value_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_27_dilations_0 = const()[name = tensor<string, []>("current_value_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_27_groups_0 = const()[name = tensor<string, []>("current_value_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(548022400)))];
+            tensor<fp16, [1024]> layers_13_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(550119616)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_27_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_bias_to_fp16, dilations = current_value_27_dilations_0, groups = current_value_27_groups_0, pad = current_value_27_pad_0, pad_type = current_value_27_pad_type_0, strides = current_value_27_strides_0, weight = layers_13_self_attn_v_proj_weight_to_fp16, x = obj_183_cast_fp16)[name = tensor<string, []>("current_value_27_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3055_cast_fp16 = mul(x = var_87_cast_fp16_13, y = var_207_cast_fp16)[name = tensor<string, []>("op_3055_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3056_cast_fp16 = mul(x = current_key_27_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3056_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_53_cast_fp16 = add(x = var_3055_cast_fp16, y = var_3056_cast_fp16)[name = tensor<string, []>("key_53_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3059_cast_fp16 = mul(x = var_114_cast_fp16_13, y = var_207_cast_fp16)[name = tensor<string, []>("op_3059_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3060_cast_fp16 = mul(x = current_value_27_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3060_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_53_cast_fp16 = add(x = var_3059_cast_fp16, y = var_3060_cast_fp16)[name = tensor<string, []>("value_53_cast_fp16")];
+            tensor<int32, [4]> var_3064 = const()[name = tensor<string, []>("op_3064"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_53_cast_fp16 = reshape(shape = var_3064, x = query_53_cast_fp16)[name = tensor<string, []>("mh_q_53_cast_fp16")];
+            tensor<fp16, []> var_3066_to_fp16 = const()[name = tensor<string, []>("op_3066_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3067_cast_fp16 = mul(x = mh_q_53_cast_fp16, y = var_3066_to_fp16)[name = tensor<string, []>("op_3067_cast_fp16")];
+            tensor<int32, [4]> var_3070 = const()[name = tensor<string, []>("op_3070"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3071_cast_fp16 = reshape(shape = var_3070, x = key_53_cast_fp16)[name = tensor<string, []>("op_3071_cast_fp16")];
+            tensor<bool, []> mh_w_79_transpose_x_0 = const()[name = tensor<string, []>("mh_w_79_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_79_transpose_y_0 = const()[name = tensor<string, []>("mh_w_79_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_79_cast_fp16 = matmul(transpose_x = mh_w_79_transpose_x_0, transpose_y = mh_w_79_transpose_y_0, x = var_3067_cast_fp16, y = var_3071_cast_fp16)[name = tensor<string, []>("mh_w_79_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_81_cast_fp16 = add(x = mh_w_79_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_81_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_3079_cast_fp16 = softmax(axis = var_2991, x = mh_w_81_cast_fp16)[name = tensor<string, []>("op_3079_cast_fp16")];
+            tensor<int32, [4]> var_3080 = const()[name = tensor<string, []>("op_3080"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3081_cast_fp16 = reshape(shape = var_3080, x = value_53_cast_fp16)[name = tensor<string, []>("op_3081_cast_fp16")];
+            tensor<bool, []> attn_53_transpose_x_0 = const()[name = tensor<string, []>("attn_53_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_53_transpose_y_0 = const()[name = tensor<string, []>("attn_53_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_53_cast_fp16 = matmul(transpose_x = attn_53_transpose_x_0, transpose_y = attn_53_transpose_y_0, x = var_3081_cast_fp16, y = var_3079_cast_fp16)[name = tensor<string, []>("attn_53_cast_fp16")];
+            tensor<int32, [4]> var_3084 = const()[name = tensor<string, []>("op_3084"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_131_cast_fp16 = reshape(shape = var_3084, x = attn_53_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> obj_189_pad_type_0 = const()[name = tensor<string, []>("obj_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_189_strides_0 = const()[name = tensor<string, []>("obj_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_189_pad_0 = const()[name = tensor<string, []>("obj_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_189_dilations_0 = const()[name = tensor<string, []>("obj_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_189_groups_0 = const()[name = tensor<string, []>("obj_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(550121728)))];
+            tensor<fp16, [1024]> layers_13_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552218944)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_189_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_bias_to_fp16, dilations = obj_189_dilations_0, groups = obj_189_groups_0, pad = obj_189_pad_0, pad_type = obj_189_pad_type_0, strides = obj_189_strides_0, weight = layers_13_self_attn_o_proj_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("obj_189_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = obj_189_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, [1]> out_81_axes_0 = const()[name = tensor<string, []>("out_81_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3106_to_fp16 = const()[name = tensor<string, []>("op_3106_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_81_cast_fp16 = layer_norm(axes = out_81_axes_0, epsilon = var_3106_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("out_81_cast_fp16")];
+            tensor<fp16, [1024]> obj_191_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_191_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552221056)))];
+            tensor<fp16, [1024]> obj_191_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_191_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552223168)))];
+            tensor<fp16, []> obj_191_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_191_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_191_cast_fp16 = batch_norm(beta = obj_191_beta_0_to_fp16, epsilon = obj_191_epsilon_0_to_fp16, gamma = obj_191_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = tensor<string, []>("obj_191_cast_fp16")];
+            tensor<string, []> query_55_pad_type_0 = const()[name = tensor<string, []>("query_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_55_strides_0 = const()[name = tensor<string, []>("query_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_55_pad_0 = const()[name = tensor<string, []>("query_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_55_dilations_0 = const()[name = tensor<string, []>("query_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_55_groups_0 = const()[name = tensor<string, []>("query_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552225280)))];
+            tensor<fp16, [1024]> layers_13_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554322496)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_55_cast_fp16 = conv(bias = layers_13_encoder_attn_q_proj_bias_to_fp16, dilations = query_55_dilations_0, groups = query_55_groups_0, pad = query_55_pad_0, pad_type = query_55_pad_type_0, strides = query_55_strides_0, weight = layers_13_encoder_attn_q_proj_weight_to_fp16, x = obj_191_cast_fp16)[name = tensor<string, []>("query_55_cast_fp16")];
+            tensor<string, []> key_55_pad_type_0 = const()[name = tensor<string, []>("key_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_55_strides_0 = const()[name = tensor<string, []>("key_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_55_pad_0 = const()[name = tensor<string, []>("key_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_55_dilations_0 = const()[name = tensor<string, []>("key_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_55_groups_0 = const()[name = tensor<string, []>("key_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554324608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_55_cast_fp16 = conv(dilations = key_55_dilations_0, groups = key_55_groups_0, pad = key_55_pad_0, pad_type = key_55_pad_type_0, strides = key_55_strides_0, weight = layers_13_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_55_cast_fp16")];
+            tensor<string, []> value_55_pad_type_0 = const()[name = tensor<string, []>("value_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_55_strides_0 = const()[name = tensor<string, []>("value_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_55_pad_0 = const()[name = tensor<string, []>("value_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_55_dilations_0 = const()[name = tensor<string, []>("value_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_55_groups_0 = const()[name = tensor<string, []>("value_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556421824)))];
+            tensor<fp16, [1024]> layers_13_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558519040)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_55_cast_fp16 = conv(bias = layers_13_encoder_attn_v_proj_bias_to_fp16, dilations = value_55_dilations_0, groups = value_55_groups_0, pad = value_55_pad_0, pad_type = value_55_pad_type_0, strides = value_55_strides_0, weight = layers_13_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_55_cast_fp16")];
+            tensor<int32, [4]> var_3142 = const()[name = tensor<string, []>("op_3142"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_55_cast_fp16 = reshape(shape = var_3142, x = query_55_cast_fp16)[name = tensor<string, []>("mh_q_55_cast_fp16")];
+            tensor<fp16, []> var_3144_to_fp16 = const()[name = tensor<string, []>("op_3144_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3145_cast_fp16 = mul(x = mh_q_55_cast_fp16, y = var_3144_to_fp16)[name = tensor<string, []>("op_3145_cast_fp16")];
+            tensor<int32, [4]> var_3148 = const()[name = tensor<string, []>("op_3148"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3149_cast_fp16 = reshape(shape = var_3148, x = key_55_cast_fp16)[name = tensor<string, []>("op_3149_cast_fp16")];
+            tensor<bool, []> mh_w_83_transpose_x_0 = const()[name = tensor<string, []>("mh_w_83_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_83_transpose_y_0 = const()[name = tensor<string, []>("mh_w_83_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_83_cast_fp16 = matmul(transpose_x = mh_w_83_transpose_x_0, transpose_y = mh_w_83_transpose_y_0, x = var_3145_cast_fp16, y = var_3149_cast_fp16)[name = tensor<string, []>("mh_w_83_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_195_cast_fp16 = softmax(axis = var_2991, x = mh_w_83_cast_fp16)[name = tensor<string, []>("obj_195_cast_fp16")];
+            tensor<int32, [4]> var_3153 = const()[name = tensor<string, []>("op_3153"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3154_cast_fp16 = reshape(shape = var_3153, x = value_55_cast_fp16)[name = tensor<string, []>("op_3154_cast_fp16")];
+            tensor<bool, []> attn_55_transpose_x_0 = const()[name = tensor<string, []>("attn_55_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_55_transpose_y_0 = const()[name = tensor<string, []>("attn_55_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_55_cast_fp16 = matmul(transpose_x = attn_55_transpose_x_0, transpose_y = attn_55_transpose_y_0, x = var_3154_cast_fp16, y = obj_195_cast_fp16)[name = tensor<string, []>("attn_55_cast_fp16")];
+            tensor<int32, [4]> var_3157 = const()[name = tensor<string, []>("op_3157"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_133_cast_fp16 = reshape(shape = var_3157, x = attn_55_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> obj_193_pad_type_0 = const()[name = tensor<string, []>("obj_193_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_193_strides_0 = const()[name = tensor<string, []>("obj_193_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_193_pad_0 = const()[name = tensor<string, []>("obj_193_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_193_dilations_0 = const()[name = tensor<string, []>("obj_193_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_193_groups_0 = const()[name = tensor<string, []>("obj_193_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_13_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558521152)))];
+            tensor<fp16, [1024]> layers_13_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560618368)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_193_cast_fp16 = conv(bias = layers_13_encoder_attn_o_proj_bias_to_fp16, dilations = obj_193_dilations_0, groups = obj_193_groups_0, pad = obj_193_pad_0, pad_type = obj_193_pad_type_0, strides = obj_193_strides_0, weight = layers_13_encoder_attn_o_proj_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("obj_193_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_193_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> out_83_axes_0 = const()[name = tensor<string, []>("out_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3178_to_fp16 = const()[name = tensor<string, []>("op_3178_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_83_cast_fp16 = layer_norm(axes = out_83_axes_0, epsilon = var_3178_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("out_83_cast_fp16")];
+            tensor<fp16, [1024]> input_135_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_135_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560620480)))];
+            tensor<fp16, [1024]> input_135_beta_0_to_fp16 = const()[name = tensor<string, []>("input_135_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560622592)))];
+            tensor<fp16, []> input_135_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_135_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_135_cast_fp16 = batch_norm(beta = input_135_beta_0_to_fp16, epsilon = input_135_epsilon_0_to_fp16, gamma = input_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> input_137_pad_type_0 = const()[name = tensor<string, []>("input_137_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_137_strides_0 = const()[name = tensor<string, []>("input_137_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_137_pad_0 = const()[name = tensor<string, []>("input_137_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_137_dilations_0 = const()[name = tensor<string, []>("input_137_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_137_groups_0 = const()[name = tensor<string, []>("input_137_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_13_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560624704)))];
+            tensor<fp16, [4096]> layers_13_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569013376)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_137_cast_fp16 = conv(bias = layers_13_fc1_bias_to_fp16, dilations = input_137_dilations_0, groups = input_137_groups_0, pad = input_137_pad_0, pad_type = input_137_pad_type_0, strides = input_137_strides_0, weight = layers_13_fc1_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_mode_0 = const()[name = tensor<string, []>("input_139_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_139_cast_fp16 = gelu(mode = input_139_mode_0, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> hidden_states_29_pad_type_0 = const()[name = tensor<string, []>("hidden_states_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_29_strides_0 = const()[name = tensor<string, []>("hidden_states_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_29_pad_0 = const()[name = tensor<string, []>("hidden_states_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_29_dilations_0 = const()[name = tensor<string, []>("hidden_states_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_29_groups_0 = const()[name = tensor<string, []>("hidden_states_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_13_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_13_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569021632)))];
+            tensor<fp16, [1024]> layers_13_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_13_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577410304)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_29_cast_fp16 = conv(bias = layers_13_fc2_bias_to_fp16, dilations = hidden_states_29_dilations_0, groups = hidden_states_29_groups_0, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = hidden_states_29_strides_0, weight = layers_13_fc2_weight_to_fp16, x = input_139_cast_fp16)[name = tensor<string, []>("hidden_states_29_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_29_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_3214 = const()[name = tensor<string, []>("op_3214"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_85_axes_0 = const()[name = tensor<string, []>("out_85_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3239_to_fp16 = const()[name = tensor<string, []>("op_3239_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_3239_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("out_85_cast_fp16")];
+            tensor<fp16, [1024]> obj_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_197_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577412416)))];
+            tensor<fp16, [1024]> obj_197_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_197_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577414528)))];
+            tensor<fp16, []> obj_197_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_197_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_197_cast_fp16 = batch_norm(beta = obj_197_beta_0_to_fp16, epsilon = obj_197_epsilon_0_to_fp16, gamma = obj_197_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = tensor<string, []>("obj_197_cast_fp16")];
+            tensor<string, []> query_57_pad_type_0 = const()[name = tensor<string, []>("query_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_57_strides_0 = const()[name = tensor<string, []>("query_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_57_pad_0 = const()[name = tensor<string, []>("query_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_57_dilations_0 = const()[name = tensor<string, []>("query_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_57_groups_0 = const()[name = tensor<string, []>("query_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577416640)))];
+            tensor<fp16, [1024]> layers_14_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(579513856)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_57_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_bias_to_fp16, dilations = query_57_dilations_0, groups = query_57_groups_0, pad = query_57_pad_0, pad_type = query_57_pad_type_0, strides = query_57_strides_0, weight = layers_14_self_attn_q_proj_weight_to_fp16, x = obj_197_cast_fp16)[name = tensor<string, []>("query_57_cast_fp16")];
+            tensor<string, []> current_key_29_pad_type_0 = const()[name = tensor<string, []>("current_key_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_29_strides_0 = const()[name = tensor<string, []>("current_key_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_29_pad_0 = const()[name = tensor<string, []>("current_key_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_29_dilations_0 = const()[name = tensor<string, []>("current_key_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_29_groups_0 = const()[name = tensor<string, []>("current_key_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(579515968)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_29_cast_fp16 = conv(dilations = current_key_29_dilations_0, groups = current_key_29_groups_0, pad = current_key_29_pad_0, pad_type = current_key_29_pad_type_0, strides = current_key_29_strides_0, weight = layers_14_self_attn_k_proj_weight_to_fp16, x = obj_197_cast_fp16)[name = tensor<string, []>("current_key_29_cast_fp16")];
+            tensor<string, []> current_value_29_pad_type_0 = const()[name = tensor<string, []>("current_value_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_29_strides_0 = const()[name = tensor<string, []>("current_value_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_29_pad_0 = const()[name = tensor<string, []>("current_value_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_29_dilations_0 = const()[name = tensor<string, []>("current_value_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_29_groups_0 = const()[name = tensor<string, []>("current_value_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581613184)))];
+            tensor<fp16, [1024]> layers_14_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583710400)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_29_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_bias_to_fp16, dilations = current_value_29_dilations_0, groups = current_value_29_groups_0, pad = current_value_29_pad_0, pad_type = current_value_29_pad_type_0, strides = current_value_29_strides_0, weight = layers_14_self_attn_v_proj_weight_to_fp16, x = obj_197_cast_fp16)[name = tensor<string, []>("current_value_29_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3278_cast_fp16 = mul(x = var_87_cast_fp16_14, y = var_207_cast_fp16)[name = tensor<string, []>("op_3278_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3279_cast_fp16 = mul(x = current_key_29_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3279_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_57_cast_fp16 = add(x = var_3278_cast_fp16, y = var_3279_cast_fp16)[name = tensor<string, []>("key_57_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3282_cast_fp16 = mul(x = var_114_cast_fp16_14, y = var_207_cast_fp16)[name = tensor<string, []>("op_3282_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3283_cast_fp16 = mul(x = current_value_29_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3283_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_57_cast_fp16 = add(x = var_3282_cast_fp16, y = var_3283_cast_fp16)[name = tensor<string, []>("value_57_cast_fp16")];
+            tensor<int32, [4]> var_3287 = const()[name = tensor<string, []>("op_3287"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_57_cast_fp16 = reshape(shape = var_3287, x = query_57_cast_fp16)[name = tensor<string, []>("mh_q_57_cast_fp16")];
+            tensor<fp16, []> var_3289_to_fp16 = const()[name = tensor<string, []>("op_3289_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3290_cast_fp16 = mul(x = mh_q_57_cast_fp16, y = var_3289_to_fp16)[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<int32, [4]> var_3293 = const()[name = tensor<string, []>("op_3293"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3294_cast_fp16 = reshape(shape = var_3293, x = key_57_cast_fp16)[name = tensor<string, []>("op_3294_cast_fp16")];
+            tensor<bool, []> mh_w_85_transpose_x_0 = const()[name = tensor<string, []>("mh_w_85_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_85_transpose_y_0 = const()[name = tensor<string, []>("mh_w_85_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_3290_cast_fp16, y = var_3294_cast_fp16)[name = tensor<string, []>("mh_w_85_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_87_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_3302_cast_fp16 = softmax(axis = var_3214, x = mh_w_87_cast_fp16)[name = tensor<string, []>("op_3302_cast_fp16")];
+            tensor<int32, [4]> var_3303 = const()[name = tensor<string, []>("op_3303"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3304_cast_fp16 = reshape(shape = var_3303, x = value_57_cast_fp16)[name = tensor<string, []>("op_3304_cast_fp16")];
+            tensor<bool, []> attn_57_transpose_x_0 = const()[name = tensor<string, []>("attn_57_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_57_transpose_y_0 = const()[name = tensor<string, []>("attn_57_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_57_cast_fp16 = matmul(transpose_x = attn_57_transpose_x_0, transpose_y = attn_57_transpose_y_0, x = var_3304_cast_fp16, y = var_3302_cast_fp16)[name = tensor<string, []>("attn_57_cast_fp16")];
+            tensor<int32, [4]> var_3307 = const()[name = tensor<string, []>("op_3307"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_141_cast_fp16 = reshape(shape = var_3307, x = attn_57_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> obj_203_pad_type_0 = const()[name = tensor<string, []>("obj_203_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_203_strides_0 = const()[name = tensor<string, []>("obj_203_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_203_pad_0 = const()[name = tensor<string, []>("obj_203_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_203_dilations_0 = const()[name = tensor<string, []>("obj_203_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_203_groups_0 = const()[name = tensor<string, []>("obj_203_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583712512)))];
+            tensor<fp16, [1024]> layers_14_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585809728)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_203_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_bias_to_fp16, dilations = obj_203_dilations_0, groups = obj_203_groups_0, pad = obj_203_pad_0, pad_type = obj_203_pad_type_0, strides = obj_203_strides_0, weight = layers_14_self_attn_o_proj_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("obj_203_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_203_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> out_87_axes_0 = const()[name = tensor<string, []>("out_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3329_to_fp16 = const()[name = tensor<string, []>("op_3329_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_87_cast_fp16 = layer_norm(axes = out_87_axes_0, epsilon = var_3329_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("out_87_cast_fp16")];
+            tensor<fp16, [1024]> obj_205_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_205_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585811840)))];
+            tensor<fp16, [1024]> obj_205_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_205_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585813952)))];
+            tensor<fp16, []> obj_205_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_205_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_205_cast_fp16 = batch_norm(beta = obj_205_beta_0_to_fp16, epsilon = obj_205_epsilon_0_to_fp16, gamma = obj_205_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = tensor<string, []>("obj_205_cast_fp16")];
+            tensor<string, []> query_59_pad_type_0 = const()[name = tensor<string, []>("query_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_59_strides_0 = const()[name = tensor<string, []>("query_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_59_pad_0 = const()[name = tensor<string, []>("query_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_59_dilations_0 = const()[name = tensor<string, []>("query_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_59_groups_0 = const()[name = tensor<string, []>("query_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585816064)))];
+            tensor<fp16, [1024]> layers_14_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(587913280)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_59_cast_fp16 = conv(bias = layers_14_encoder_attn_q_proj_bias_to_fp16, dilations = query_59_dilations_0, groups = query_59_groups_0, pad = query_59_pad_0, pad_type = query_59_pad_type_0, strides = query_59_strides_0, weight = layers_14_encoder_attn_q_proj_weight_to_fp16, x = obj_205_cast_fp16)[name = tensor<string, []>("query_59_cast_fp16")];
+            tensor<string, []> key_59_pad_type_0 = const()[name = tensor<string, []>("key_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_59_strides_0 = const()[name = tensor<string, []>("key_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_59_pad_0 = const()[name = tensor<string, []>("key_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_59_dilations_0 = const()[name = tensor<string, []>("key_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_59_groups_0 = const()[name = tensor<string, []>("key_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(587915392)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_59_cast_fp16 = conv(dilations = key_59_dilations_0, groups = key_59_groups_0, pad = key_59_pad_0, pad_type = key_59_pad_type_0, strides = key_59_strides_0, weight = layers_14_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_59_cast_fp16")];
+            tensor<string, []> value_59_pad_type_0 = const()[name = tensor<string, []>("value_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_59_strides_0 = const()[name = tensor<string, []>("value_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_59_pad_0 = const()[name = tensor<string, []>("value_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_59_dilations_0 = const()[name = tensor<string, []>("value_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_59_groups_0 = const()[name = tensor<string, []>("value_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(590012608)))];
+            tensor<fp16, [1024]> layers_14_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(592109824)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_59_cast_fp16 = conv(bias = layers_14_encoder_attn_v_proj_bias_to_fp16, dilations = value_59_dilations_0, groups = value_59_groups_0, pad = value_59_pad_0, pad_type = value_59_pad_type_0, strides = value_59_strides_0, weight = layers_14_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_59_cast_fp16")];
+            tensor<int32, [4]> var_3365 = const()[name = tensor<string, []>("op_3365"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_59_cast_fp16 = reshape(shape = var_3365, x = query_59_cast_fp16)[name = tensor<string, []>("mh_q_59_cast_fp16")];
+            tensor<fp16, []> var_3367_to_fp16 = const()[name = tensor<string, []>("op_3367_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3368_cast_fp16 = mul(x = mh_q_59_cast_fp16, y = var_3367_to_fp16)[name = tensor<string, []>("op_3368_cast_fp16")];
+            tensor<int32, [4]> var_3371 = const()[name = tensor<string, []>("op_3371"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3372_cast_fp16 = reshape(shape = var_3371, x = key_59_cast_fp16)[name = tensor<string, []>("op_3372_cast_fp16")];
+            tensor<bool, []> mh_w_89_transpose_x_0 = const()[name = tensor<string, []>("mh_w_89_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_89_transpose_y_0 = const()[name = tensor<string, []>("mh_w_89_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_3368_cast_fp16, y = var_3372_cast_fp16)[name = tensor<string, []>("mh_w_89_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_209_cast_fp16 = softmax(axis = var_3214, x = mh_w_89_cast_fp16)[name = tensor<string, []>("obj_209_cast_fp16")];
+            tensor<int32, [4]> var_3376 = const()[name = tensor<string, []>("op_3376"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3377_cast_fp16 = reshape(shape = var_3376, x = value_59_cast_fp16)[name = tensor<string, []>("op_3377_cast_fp16")];
+            tensor<bool, []> attn_59_transpose_x_0 = const()[name = tensor<string, []>("attn_59_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_59_transpose_y_0 = const()[name = tensor<string, []>("attn_59_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_59_cast_fp16 = matmul(transpose_x = attn_59_transpose_x_0, transpose_y = attn_59_transpose_y_0, x = var_3377_cast_fp16, y = obj_209_cast_fp16)[name = tensor<string, []>("attn_59_cast_fp16")];
+            tensor<int32, [4]> var_3380 = const()[name = tensor<string, []>("op_3380"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_143_cast_fp16 = reshape(shape = var_3380, x = attn_59_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> obj_207_pad_type_0 = const()[name = tensor<string, []>("obj_207_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_207_strides_0 = const()[name = tensor<string, []>("obj_207_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_207_pad_0 = const()[name = tensor<string, []>("obj_207_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_207_dilations_0 = const()[name = tensor<string, []>("obj_207_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_207_groups_0 = const()[name = tensor<string, []>("obj_207_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_14_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(592111936)))];
+            tensor<fp16, [1024]> layers_14_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(594209152)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_207_cast_fp16 = conv(bias = layers_14_encoder_attn_o_proj_bias_to_fp16, dilations = obj_207_dilations_0, groups = obj_207_groups_0, pad = obj_207_pad_0, pad_type = obj_207_pad_type_0, strides = obj_207_strides_0, weight = layers_14_encoder_attn_o_proj_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("obj_207_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = obj_207_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, [1]> out_89_axes_0 = const()[name = tensor<string, []>("out_89_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3398_to_fp16 = const()[name = tensor<string, []>("op_3398_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_89_cast_fp16 = layer_norm(axes = out_89_axes_0, epsilon = var_3398_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("out_89_cast_fp16")];
+            tensor<fp16, [1024]> input_145_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_145_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(594211264)))];
+            tensor<fp16, [1024]> input_145_beta_0_to_fp16 = const()[name = tensor<string, []>("input_145_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(594213376)))];
+            tensor<fp16, []> input_145_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_145_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_145_cast_fp16 = batch_norm(beta = input_145_beta_0_to_fp16, epsilon = input_145_epsilon_0_to_fp16, gamma = input_145_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> input_147_pad_type_0 = const()[name = tensor<string, []>("input_147_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_147_strides_0 = const()[name = tensor<string, []>("input_147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_147_pad_0 = const()[name = tensor<string, []>("input_147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_147_dilations_0 = const()[name = tensor<string, []>("input_147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_147_groups_0 = const()[name = tensor<string, []>("input_147_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_14_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(594215488)))];
+            tensor<fp16, [4096]> layers_14_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(602604160)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_147_cast_fp16 = conv(bias = layers_14_fc1_bias_to_fp16, dilations = input_147_dilations_0, groups = input_147_groups_0, pad = input_147_pad_0, pad_type = input_147_pad_type_0, strides = input_147_strides_0, weight = layers_14_fc1_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_mode_0 = const()[name = tensor<string, []>("input_149_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> hidden_states_31_pad_type_0 = const()[name = tensor<string, []>("hidden_states_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = tensor<string, []>("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = tensor<string, []>("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = tensor<string, []>("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_31_groups_0 = const()[name = tensor<string, []>("hidden_states_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_14_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_14_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(602612416)))];
+            tensor<fp16, [1024]> layers_14_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_14_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611001088)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_31_cast_fp16 = conv(bias = layers_14_fc2_bias_to_fp16, dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = layers_14_fc2_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("hidden_states_31_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, []> var_3433 = const()[name = tensor<string, []>("op_3433"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_91_axes_0 = const()[name = tensor<string, []>("out_91_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3458_to_fp16 = const()[name = tensor<string, []>("op_3458_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_3458_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("out_91_cast_fp16")];
+            tensor<fp16, [1024]> obj_211_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_211_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611003200)))];
+            tensor<fp16, [1024]> obj_211_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_211_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611005312)))];
+            tensor<fp16, []> obj_211_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_211_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_211_cast_fp16 = batch_norm(beta = obj_211_beta_0_to_fp16, epsilon = obj_211_epsilon_0_to_fp16, gamma = obj_211_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = tensor<string, []>("obj_211_cast_fp16")];
+            tensor<string, []> query_61_pad_type_0 = const()[name = tensor<string, []>("query_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_61_strides_0 = const()[name = tensor<string, []>("query_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_61_pad_0 = const()[name = tensor<string, []>("query_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_61_dilations_0 = const()[name = tensor<string, []>("query_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_61_groups_0 = const()[name = tensor<string, []>("query_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611007424)))];
+            tensor<fp16, [1024]> layers_15_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(613104640)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_61_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_bias_to_fp16, dilations = query_61_dilations_0, groups = query_61_groups_0, pad = query_61_pad_0, pad_type = query_61_pad_type_0, strides = query_61_strides_0, weight = layers_15_self_attn_q_proj_weight_to_fp16, x = obj_211_cast_fp16)[name = tensor<string, []>("query_61_cast_fp16")];
+            tensor<string, []> current_key_31_pad_type_0 = const()[name = tensor<string, []>("current_key_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_31_strides_0 = const()[name = tensor<string, []>("current_key_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_31_pad_0 = const()[name = tensor<string, []>("current_key_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_31_dilations_0 = const()[name = tensor<string, []>("current_key_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_31_groups_0 = const()[name = tensor<string, []>("current_key_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(613106752)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_31_cast_fp16 = conv(dilations = current_key_31_dilations_0, groups = current_key_31_groups_0, pad = current_key_31_pad_0, pad_type = current_key_31_pad_type_0, strides = current_key_31_strides_0, weight = layers_15_self_attn_k_proj_weight_to_fp16, x = obj_211_cast_fp16)[name = tensor<string, []>("current_key_31_cast_fp16")];
+            tensor<string, []> current_value_31_pad_type_0 = const()[name = tensor<string, []>("current_value_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_31_strides_0 = const()[name = tensor<string, []>("current_value_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_31_pad_0 = const()[name = tensor<string, []>("current_value_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_31_dilations_0 = const()[name = tensor<string, []>("current_value_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_31_groups_0 = const()[name = tensor<string, []>("current_value_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(615203968)))];
+            tensor<fp16, [1024]> layers_15_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617301184)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_31_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_bias_to_fp16, dilations = current_value_31_dilations_0, groups = current_value_31_groups_0, pad = current_value_31_pad_0, pad_type = current_value_31_pad_type_0, strides = current_value_31_strides_0, weight = layers_15_self_attn_v_proj_weight_to_fp16, x = obj_211_cast_fp16)[name = tensor<string, []>("current_value_31_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3497_cast_fp16 = mul(x = var_87_cast_fp16_15, y = var_207_cast_fp16)[name = tensor<string, []>("op_3497_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3498_cast_fp16 = mul(x = current_key_31_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3498_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_61_cast_fp16 = add(x = var_3497_cast_fp16, y = var_3498_cast_fp16)[name = tensor<string, []>("key_61_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3501_cast_fp16 = mul(x = var_114_cast_fp16_15, y = var_207_cast_fp16)[name = tensor<string, []>("op_3501_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3502_cast_fp16 = mul(x = current_value_31_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3502_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_61_cast_fp16 = add(x = var_3501_cast_fp16, y = var_3502_cast_fp16)[name = tensor<string, []>("value_61_cast_fp16")];
+            tensor<int32, [4]> var_3506 = const()[name = tensor<string, []>("op_3506"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_61_cast_fp16 = reshape(shape = var_3506, x = query_61_cast_fp16)[name = tensor<string, []>("mh_q_61_cast_fp16")];
+            tensor<fp16, []> var_3508_to_fp16 = const()[name = tensor<string, []>("op_3508_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3509_cast_fp16 = mul(x = mh_q_61_cast_fp16, y = var_3508_to_fp16)[name = tensor<string, []>("op_3509_cast_fp16")];
+            tensor<int32, [4]> var_3512 = const()[name = tensor<string, []>("op_3512"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3513_cast_fp16 = reshape(shape = var_3512, x = key_61_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<bool, []> mh_w_91_transpose_x_0 = const()[name = tensor<string, []>("mh_w_91_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_91_transpose_y_0 = const()[name = tensor<string, []>("mh_w_91_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_91_cast_fp16 = matmul(transpose_x = mh_w_91_transpose_x_0, transpose_y = mh_w_91_transpose_y_0, x = var_3509_cast_fp16, y = var_3513_cast_fp16)[name = tensor<string, []>("mh_w_91_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_93_cast_fp16 = add(x = mh_w_91_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_93_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_3521_cast_fp16 = softmax(axis = var_3433, x = mh_w_93_cast_fp16)[name = tensor<string, []>("op_3521_cast_fp16")];
+            tensor<int32, [4]> var_3522 = const()[name = tensor<string, []>("op_3522"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3523_cast_fp16 = reshape(shape = var_3522, x = value_61_cast_fp16)[name = tensor<string, []>("op_3523_cast_fp16")];
+            tensor<bool, []> attn_61_transpose_x_0 = const()[name = tensor<string, []>("attn_61_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_61_transpose_y_0 = const()[name = tensor<string, []>("attn_61_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_61_cast_fp16 = matmul(transpose_x = attn_61_transpose_x_0, transpose_y = attn_61_transpose_y_0, x = var_3523_cast_fp16, y = var_3521_cast_fp16)[name = tensor<string, []>("attn_61_cast_fp16")];
+            tensor<int32, [4]> var_3526 = const()[name = tensor<string, []>("op_3526"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_151_cast_fp16 = reshape(shape = var_3526, x = attn_61_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> obj_217_pad_type_0 = const()[name = tensor<string, []>("obj_217_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_217_strides_0 = const()[name = tensor<string, []>("obj_217_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_217_pad_0 = const()[name = tensor<string, []>("obj_217_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_217_dilations_0 = const()[name = tensor<string, []>("obj_217_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_217_groups_0 = const()[name = tensor<string, []>("obj_217_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617303296)))];
+            tensor<fp16, [1024]> layers_15_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(619400512)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_217_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_bias_to_fp16, dilations = obj_217_dilations_0, groups = obj_217_groups_0, pad = obj_217_pad_0, pad_type = obj_217_pad_type_0, strides = obj_217_strides_0, weight = layers_15_self_attn_o_proj_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("obj_217_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = obj_217_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, [1]> out_93_axes_0 = const()[name = tensor<string, []>("out_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3548_to_fp16 = const()[name = tensor<string, []>("op_3548_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_93_cast_fp16 = layer_norm(axes = out_93_axes_0, epsilon = var_3548_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("out_93_cast_fp16")];
+            tensor<fp16, [1024]> obj_219_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_219_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(619402624)))];
+            tensor<fp16, [1024]> obj_219_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_219_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(619404736)))];
+            tensor<fp16, []> obj_219_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_219_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_219_cast_fp16 = batch_norm(beta = obj_219_beta_0_to_fp16, epsilon = obj_219_epsilon_0_to_fp16, gamma = obj_219_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = tensor<string, []>("obj_219_cast_fp16")];
+            tensor<string, []> query_63_pad_type_0 = const()[name = tensor<string, []>("query_63_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_63_strides_0 = const()[name = tensor<string, []>("query_63_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_63_pad_0 = const()[name = tensor<string, []>("query_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_63_dilations_0 = const()[name = tensor<string, []>("query_63_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_63_groups_0 = const()[name = tensor<string, []>("query_63_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(619406848)))];
+            tensor<fp16, [1024]> layers_15_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(621504064)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_63_cast_fp16 = conv(bias = layers_15_encoder_attn_q_proj_bias_to_fp16, dilations = query_63_dilations_0, groups = query_63_groups_0, pad = query_63_pad_0, pad_type = query_63_pad_type_0, strides = query_63_strides_0, weight = layers_15_encoder_attn_q_proj_weight_to_fp16, x = obj_219_cast_fp16)[name = tensor<string, []>("query_63_cast_fp16")];
+            tensor<string, []> key_63_pad_type_0 = const()[name = tensor<string, []>("key_63_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_63_strides_0 = const()[name = tensor<string, []>("key_63_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_63_pad_0 = const()[name = tensor<string, []>("key_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_63_dilations_0 = const()[name = tensor<string, []>("key_63_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_63_groups_0 = const()[name = tensor<string, []>("key_63_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(621506176)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_63_cast_fp16 = conv(dilations = key_63_dilations_0, groups = key_63_groups_0, pad = key_63_pad_0, pad_type = key_63_pad_type_0, strides = key_63_strides_0, weight = layers_15_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_63_cast_fp16")];
+            tensor<string, []> value_63_pad_type_0 = const()[name = tensor<string, []>("value_63_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_63_strides_0 = const()[name = tensor<string, []>("value_63_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_63_pad_0 = const()[name = tensor<string, []>("value_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_63_dilations_0 = const()[name = tensor<string, []>("value_63_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_63_groups_0 = const()[name = tensor<string, []>("value_63_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(623603392)))];
+            tensor<fp16, [1024]> layers_15_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(625700608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_63_cast_fp16 = conv(bias = layers_15_encoder_attn_v_proj_bias_to_fp16, dilations = value_63_dilations_0, groups = value_63_groups_0, pad = value_63_pad_0, pad_type = value_63_pad_type_0, strides = value_63_strides_0, weight = layers_15_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_63_cast_fp16")];
+            tensor<int32, [4]> var_3584 = const()[name = tensor<string, []>("op_3584"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_63_cast_fp16 = reshape(shape = var_3584, x = query_63_cast_fp16)[name = tensor<string, []>("mh_q_63_cast_fp16")];
+            tensor<fp16, []> var_3586_to_fp16 = const()[name = tensor<string, []>("op_3586_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3587_cast_fp16 = mul(x = mh_q_63_cast_fp16, y = var_3586_to_fp16)[name = tensor<string, []>("op_3587_cast_fp16")];
+            tensor<int32, [4]> var_3590 = const()[name = tensor<string, []>("op_3590"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3591_cast_fp16 = reshape(shape = var_3590, x = key_63_cast_fp16)[name = tensor<string, []>("op_3591_cast_fp16")];
+            tensor<bool, []> mh_w_95_transpose_x_0 = const()[name = tensor<string, []>("mh_w_95_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_95_transpose_y_0 = const()[name = tensor<string, []>("mh_w_95_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_95_cast_fp16 = matmul(transpose_x = mh_w_95_transpose_x_0, transpose_y = mh_w_95_transpose_y_0, x = var_3587_cast_fp16, y = var_3591_cast_fp16)[name = tensor<string, []>("mh_w_95_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_223_cast_fp16 = softmax(axis = var_3433, x = mh_w_95_cast_fp16)[name = tensor<string, []>("obj_223_cast_fp16")];
+            tensor<int32, [4]> var_3595 = const()[name = tensor<string, []>("op_3595"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3596_cast_fp16 = reshape(shape = var_3595, x = value_63_cast_fp16)[name = tensor<string, []>("op_3596_cast_fp16")];
+            tensor<bool, []> attn_63_transpose_x_0 = const()[name = tensor<string, []>("attn_63_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_63_transpose_y_0 = const()[name = tensor<string, []>("attn_63_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_63_cast_fp16 = matmul(transpose_x = attn_63_transpose_x_0, transpose_y = attn_63_transpose_y_0, x = var_3596_cast_fp16, y = obj_223_cast_fp16)[name = tensor<string, []>("attn_63_cast_fp16")];
+            tensor<int32, [4]> var_3599 = const()[name = tensor<string, []>("op_3599"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_153_cast_fp16 = reshape(shape = var_3599, x = attn_63_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> obj_221_pad_type_0 = const()[name = tensor<string, []>("obj_221_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_221_strides_0 = const()[name = tensor<string, []>("obj_221_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_221_pad_0 = const()[name = tensor<string, []>("obj_221_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_221_dilations_0 = const()[name = tensor<string, []>("obj_221_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_221_groups_0 = const()[name = tensor<string, []>("obj_221_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_15_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(625702720)))];
+            tensor<fp16, [1024]> layers_15_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(627799936)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_221_cast_fp16 = conv(bias = layers_15_encoder_attn_o_proj_bias_to_fp16, dilations = obj_221_dilations_0, groups = obj_221_groups_0, pad = obj_221_pad_0, pad_type = obj_221_pad_type_0, strides = obj_221_strides_0, weight = layers_15_encoder_attn_o_proj_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("obj_221_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_221_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> out_95_axes_0 = const()[name = tensor<string, []>("out_95_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3620_to_fp16 = const()[name = tensor<string, []>("op_3620_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_95_cast_fp16 = layer_norm(axes = out_95_axes_0, epsilon = var_3620_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("out_95_cast_fp16")];
+            tensor<fp16, [1024]> input_155_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_155_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(627802048)))];
+            tensor<fp16, [1024]> input_155_beta_0_to_fp16 = const()[name = tensor<string, []>("input_155_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(627804160)))];
+            tensor<fp16, []> input_155_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_155_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> input_157_pad_type_0 = const()[name = tensor<string, []>("input_157_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_157_strides_0 = const()[name = tensor<string, []>("input_157_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_157_pad_0 = const()[name = tensor<string, []>("input_157_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_157_dilations_0 = const()[name = tensor<string, []>("input_157_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_157_groups_0 = const()[name = tensor<string, []>("input_157_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_15_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(627806272)))];
+            tensor<fp16, [4096]> layers_15_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(636194944)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_157_cast_fp16 = conv(bias = layers_15_fc1_bias_to_fp16, dilations = input_157_dilations_0, groups = input_157_groups_0, pad = input_157_pad_0, pad_type = input_157_pad_type_0, strides = input_157_strides_0, weight = layers_15_fc1_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_mode_0 = const()[name = tensor<string, []>("input_159_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> hidden_states_33_pad_type_0 = const()[name = tensor<string, []>("hidden_states_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_33_strides_0 = const()[name = tensor<string, []>("hidden_states_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_33_pad_0 = const()[name = tensor<string, []>("hidden_states_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_33_dilations_0 = const()[name = tensor<string, []>("hidden_states_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_33_groups_0 = const()[name = tensor<string, []>("hidden_states_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_15_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_15_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(636203200)))];
+            tensor<fp16, [1024]> layers_15_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_15_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644591872)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_33_cast_fp16 = conv(bias = layers_15_fc2_bias_to_fp16, dilations = hidden_states_33_dilations_0, groups = hidden_states_33_groups_0, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = hidden_states_33_strides_0, weight = layers_15_fc2_weight_to_fp16, x = input_159_cast_fp16)[name = tensor<string, []>("hidden_states_33_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_33_cast_fp16)[name = tensor<string, []>("inputs_97_cast_fp16")];
+            tensor<int32, []> var_3656 = const()[name = tensor<string, []>("op_3656"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_97_axes_0 = const()[name = tensor<string, []>("out_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3681_to_fp16 = const()[name = tensor<string, []>("op_3681_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_3681_to_fp16, x = inputs_97_cast_fp16)[name = tensor<string, []>("out_97_cast_fp16")];
+            tensor<fp16, [1024]> obj_225_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_225_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644593984)))];
+            tensor<fp16, [1024]> obj_225_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_225_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644596096)))];
+            tensor<fp16, []> obj_225_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_225_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_225_cast_fp16 = batch_norm(beta = obj_225_beta_0_to_fp16, epsilon = obj_225_epsilon_0_to_fp16, gamma = obj_225_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_97_cast_fp16)[name = tensor<string, []>("obj_225_cast_fp16")];
+            tensor<string, []> query_65_pad_type_0 = const()[name = tensor<string, []>("query_65_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_65_strides_0 = const()[name = tensor<string, []>("query_65_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_65_pad_0 = const()[name = tensor<string, []>("query_65_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_65_dilations_0 = const()[name = tensor<string, []>("query_65_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_65_groups_0 = const()[name = tensor<string, []>("query_65_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644598208)))];
+            tensor<fp16, [1024]> layers_16_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(646695424)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_65_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_bias_to_fp16, dilations = query_65_dilations_0, groups = query_65_groups_0, pad = query_65_pad_0, pad_type = query_65_pad_type_0, strides = query_65_strides_0, weight = layers_16_self_attn_q_proj_weight_to_fp16, x = obj_225_cast_fp16)[name = tensor<string, []>("query_65_cast_fp16")];
+            tensor<string, []> current_key_33_pad_type_0 = const()[name = tensor<string, []>("current_key_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_33_strides_0 = const()[name = tensor<string, []>("current_key_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_33_pad_0 = const()[name = tensor<string, []>("current_key_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_33_dilations_0 = const()[name = tensor<string, []>("current_key_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_33_groups_0 = const()[name = tensor<string, []>("current_key_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(646697536)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_33_cast_fp16 = conv(dilations = current_key_33_dilations_0, groups = current_key_33_groups_0, pad = current_key_33_pad_0, pad_type = current_key_33_pad_type_0, strides = current_key_33_strides_0, weight = layers_16_self_attn_k_proj_weight_to_fp16, x = obj_225_cast_fp16)[name = tensor<string, []>("current_key_33_cast_fp16")];
+            tensor<string, []> current_value_33_pad_type_0 = const()[name = tensor<string, []>("current_value_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_33_strides_0 = const()[name = tensor<string, []>("current_value_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_33_pad_0 = const()[name = tensor<string, []>("current_value_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_33_dilations_0 = const()[name = tensor<string, []>("current_value_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_33_groups_0 = const()[name = tensor<string, []>("current_value_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(648794752)))];
+            tensor<fp16, [1024]> layers_16_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(650891968)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_33_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_bias_to_fp16, dilations = current_value_33_dilations_0, groups = current_value_33_groups_0, pad = current_value_33_pad_0, pad_type = current_value_33_pad_type_0, strides = current_value_33_strides_0, weight = layers_16_self_attn_v_proj_weight_to_fp16, x = obj_225_cast_fp16)[name = tensor<string, []>("current_value_33_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3720_cast_fp16 = mul(x = var_87_cast_fp16_16, y = var_207_cast_fp16)[name = tensor<string, []>("op_3720_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3721_cast_fp16 = mul(x = current_key_33_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3721_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_65_cast_fp16 = add(x = var_3720_cast_fp16, y = var_3721_cast_fp16)[name = tensor<string, []>("key_65_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3724_cast_fp16 = mul(x = var_114_cast_fp16_16, y = var_207_cast_fp16)[name = tensor<string, []>("op_3724_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3725_cast_fp16 = mul(x = current_value_33_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3725_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_65_cast_fp16 = add(x = var_3724_cast_fp16, y = var_3725_cast_fp16)[name = tensor<string, []>("value_65_cast_fp16")];
+            tensor<int32, [4]> var_3729 = const()[name = tensor<string, []>("op_3729"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_65_cast_fp16 = reshape(shape = var_3729, x = query_65_cast_fp16)[name = tensor<string, []>("mh_q_65_cast_fp16")];
+            tensor<fp16, []> var_3731_to_fp16 = const()[name = tensor<string, []>("op_3731_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3732_cast_fp16 = mul(x = mh_q_65_cast_fp16, y = var_3731_to_fp16)[name = tensor<string, []>("op_3732_cast_fp16")];
+            tensor<int32, [4]> var_3735 = const()[name = tensor<string, []>("op_3735"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3736_cast_fp16 = reshape(shape = var_3735, x = key_65_cast_fp16)[name = tensor<string, []>("op_3736_cast_fp16")];
+            tensor<bool, []> mh_w_97_transpose_x_0 = const()[name = tensor<string, []>("mh_w_97_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_97_transpose_y_0 = const()[name = tensor<string, []>("mh_w_97_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_97_cast_fp16 = matmul(transpose_x = mh_w_97_transpose_x_0, transpose_y = mh_w_97_transpose_y_0, x = var_3732_cast_fp16, y = var_3736_cast_fp16)[name = tensor<string, []>("mh_w_97_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_99_cast_fp16 = add(x = mh_w_97_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_99_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_3744_cast_fp16 = softmax(axis = var_3656, x = mh_w_99_cast_fp16)[name = tensor<string, []>("op_3744_cast_fp16")];
+            tensor<int32, [4]> var_3745 = const()[name = tensor<string, []>("op_3745"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3746_cast_fp16 = reshape(shape = var_3745, x = value_65_cast_fp16)[name = tensor<string, []>("op_3746_cast_fp16")];
+            tensor<bool, []> attn_65_transpose_x_0 = const()[name = tensor<string, []>("attn_65_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_65_transpose_y_0 = const()[name = tensor<string, []>("attn_65_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_65_cast_fp16 = matmul(transpose_x = attn_65_transpose_x_0, transpose_y = attn_65_transpose_y_0, x = var_3746_cast_fp16, y = var_3744_cast_fp16)[name = tensor<string, []>("attn_65_cast_fp16")];
+            tensor<int32, [4]> var_3749 = const()[name = tensor<string, []>("op_3749"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_161_cast_fp16 = reshape(shape = var_3749, x = attn_65_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> obj_231_pad_type_0 = const()[name = tensor<string, []>("obj_231_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_231_strides_0 = const()[name = tensor<string, []>("obj_231_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_231_pad_0 = const()[name = tensor<string, []>("obj_231_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_231_dilations_0 = const()[name = tensor<string, []>("obj_231_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_231_groups_0 = const()[name = tensor<string, []>("obj_231_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(650894080)))];
+            tensor<fp16, [1024]> layers_16_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(652991296)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_231_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_bias_to_fp16, dilations = obj_231_dilations_0, groups = obj_231_groups_0, pad = obj_231_pad_0, pad_type = obj_231_pad_type_0, strides = obj_231_strides_0, weight = layers_16_self_attn_o_proj_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("obj_231_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = obj_231_cast_fp16)[name = tensor<string, []>("inputs_99_cast_fp16")];
+            tensor<int32, [1]> out_99_axes_0 = const()[name = tensor<string, []>("out_99_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3771_to_fp16 = const()[name = tensor<string, []>("op_3771_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_99_cast_fp16 = layer_norm(axes = out_99_axes_0, epsilon = var_3771_to_fp16, x = inputs_99_cast_fp16)[name = tensor<string, []>("out_99_cast_fp16")];
+            tensor<fp16, [1024]> obj_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_233_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(652993408)))];
+            tensor<fp16, [1024]> obj_233_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_233_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(652995520)))];
+            tensor<fp16, []> obj_233_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_233_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_233_cast_fp16 = batch_norm(beta = obj_233_beta_0_to_fp16, epsilon = obj_233_epsilon_0_to_fp16, gamma = obj_233_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_99_cast_fp16)[name = tensor<string, []>("obj_233_cast_fp16")];
+            tensor<string, []> query_67_pad_type_0 = const()[name = tensor<string, []>("query_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_67_strides_0 = const()[name = tensor<string, []>("query_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_67_pad_0 = const()[name = tensor<string, []>("query_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_67_dilations_0 = const()[name = tensor<string, []>("query_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_67_groups_0 = const()[name = tensor<string, []>("query_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(652997632)))];
+            tensor<fp16, [1024]> layers_16_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(655094848)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_67_cast_fp16 = conv(bias = layers_16_encoder_attn_q_proj_bias_to_fp16, dilations = query_67_dilations_0, groups = query_67_groups_0, pad = query_67_pad_0, pad_type = query_67_pad_type_0, strides = query_67_strides_0, weight = layers_16_encoder_attn_q_proj_weight_to_fp16, x = obj_233_cast_fp16)[name = tensor<string, []>("query_67_cast_fp16")];
+            tensor<string, []> key_67_pad_type_0 = const()[name = tensor<string, []>("key_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_67_strides_0 = const()[name = tensor<string, []>("key_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_67_pad_0 = const()[name = tensor<string, []>("key_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_67_dilations_0 = const()[name = tensor<string, []>("key_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_67_groups_0 = const()[name = tensor<string, []>("key_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(655096960)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_67_cast_fp16 = conv(dilations = key_67_dilations_0, groups = key_67_groups_0, pad = key_67_pad_0, pad_type = key_67_pad_type_0, strides = key_67_strides_0, weight = layers_16_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_67_cast_fp16")];
+            tensor<string, []> value_67_pad_type_0 = const()[name = tensor<string, []>("value_67_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_67_strides_0 = const()[name = tensor<string, []>("value_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_67_pad_0 = const()[name = tensor<string, []>("value_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_67_dilations_0 = const()[name = tensor<string, []>("value_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_67_groups_0 = const()[name = tensor<string, []>("value_67_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657194176)))];
+            tensor<fp16, [1024]> layers_16_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(659291392)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_67_cast_fp16 = conv(bias = layers_16_encoder_attn_v_proj_bias_to_fp16, dilations = value_67_dilations_0, groups = value_67_groups_0, pad = value_67_pad_0, pad_type = value_67_pad_type_0, strides = value_67_strides_0, weight = layers_16_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_67_cast_fp16")];
+            tensor<int32, [4]> var_3807 = const()[name = tensor<string, []>("op_3807"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_67_cast_fp16 = reshape(shape = var_3807, x = query_67_cast_fp16)[name = tensor<string, []>("mh_q_67_cast_fp16")];
+            tensor<fp16, []> var_3809_to_fp16 = const()[name = tensor<string, []>("op_3809_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3810_cast_fp16 = mul(x = mh_q_67_cast_fp16, y = var_3809_to_fp16)[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<int32, [4]> var_3813 = const()[name = tensor<string, []>("op_3813"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3814_cast_fp16 = reshape(shape = var_3813, x = key_67_cast_fp16)[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<bool, []> mh_w_101_transpose_x_0 = const()[name = tensor<string, []>("mh_w_101_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_101_transpose_y_0 = const()[name = tensor<string, []>("mh_w_101_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_101_cast_fp16 = matmul(transpose_x = mh_w_101_transpose_x_0, transpose_y = mh_w_101_transpose_y_0, x = var_3810_cast_fp16, y = var_3814_cast_fp16)[name = tensor<string, []>("mh_w_101_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_237_cast_fp16 = softmax(axis = var_3656, x = mh_w_101_cast_fp16)[name = tensor<string, []>("obj_237_cast_fp16")];
+            tensor<int32, [4]> var_3818 = const()[name = tensor<string, []>("op_3818"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_3819_cast_fp16 = reshape(shape = var_3818, x = value_67_cast_fp16)[name = tensor<string, []>("op_3819_cast_fp16")];
+            tensor<bool, []> attn_67_transpose_x_0 = const()[name = tensor<string, []>("attn_67_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_67_transpose_y_0 = const()[name = tensor<string, []>("attn_67_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_67_cast_fp16 = matmul(transpose_x = attn_67_transpose_x_0, transpose_y = attn_67_transpose_y_0, x = var_3819_cast_fp16, y = obj_237_cast_fp16)[name = tensor<string, []>("attn_67_cast_fp16")];
+            tensor<int32, [4]> var_3822 = const()[name = tensor<string, []>("op_3822"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_163_cast_fp16 = reshape(shape = var_3822, x = attn_67_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> obj_235_pad_type_0 = const()[name = tensor<string, []>("obj_235_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_235_strides_0 = const()[name = tensor<string, []>("obj_235_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_235_pad_0 = const()[name = tensor<string, []>("obj_235_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_235_dilations_0 = const()[name = tensor<string, []>("obj_235_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_235_groups_0 = const()[name = tensor<string, []>("obj_235_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_16_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(659293504)))];
+            tensor<fp16, [1024]> layers_16_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(661390720)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_235_cast_fp16 = conv(bias = layers_16_encoder_attn_o_proj_bias_to_fp16, dilations = obj_235_dilations_0, groups = obj_235_groups_0, pad = obj_235_pad_0, pad_type = obj_235_pad_type_0, strides = obj_235_strides_0, weight = layers_16_encoder_attn_o_proj_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("obj_235_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = obj_235_cast_fp16)[name = tensor<string, []>("inputs_101_cast_fp16")];
+            tensor<int32, [1]> out_101_axes_0 = const()[name = tensor<string, []>("out_101_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3843_to_fp16 = const()[name = tensor<string, []>("op_3843_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_101_cast_fp16 = layer_norm(axes = out_101_axes_0, epsilon = var_3843_to_fp16, x = inputs_101_cast_fp16)[name = tensor<string, []>("out_101_cast_fp16")];
+            tensor<fp16, [1024]> input_165_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_165_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(661392832)))];
+            tensor<fp16, [1024]> input_165_beta_0_to_fp16 = const()[name = tensor<string, []>("input_165_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(661394944)))];
+            tensor<fp16, []> input_165_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_165_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_165_cast_fp16 = batch_norm(beta = input_165_beta_0_to_fp16, epsilon = input_165_epsilon_0_to_fp16, gamma = input_165_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_101_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> input_167_pad_type_0 = const()[name = tensor<string, []>("input_167_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_167_strides_0 = const()[name = tensor<string, []>("input_167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_167_pad_0 = const()[name = tensor<string, []>("input_167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_167_dilations_0 = const()[name = tensor<string, []>("input_167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_167_groups_0 = const()[name = tensor<string, []>("input_167_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_16_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(661397056)))];
+            tensor<fp16, [4096]> layers_16_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(669785728)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_167_cast_fp16 = conv(bias = layers_16_fc1_bias_to_fp16, dilations = input_167_dilations_0, groups = input_167_groups_0, pad = input_167_pad_0, pad_type = input_167_pad_type_0, strides = input_167_strides_0, weight = layers_16_fc1_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_mode_0 = const()[name = tensor<string, []>("input_169_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_169_cast_fp16 = gelu(mode = input_169_mode_0, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> hidden_states_35_pad_type_0 = const()[name = tensor<string, []>("hidden_states_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_35_strides_0 = const()[name = tensor<string, []>("hidden_states_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_35_pad_0 = const()[name = tensor<string, []>("hidden_states_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_35_dilations_0 = const()[name = tensor<string, []>("hidden_states_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_35_groups_0 = const()[name = tensor<string, []>("hidden_states_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_16_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_16_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(669793984)))];
+            tensor<fp16, [1024]> layers_16_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_16_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(678182656)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_35_cast_fp16 = conv(bias = layers_16_fc2_bias_to_fp16, dilations = hidden_states_35_dilations_0, groups = hidden_states_35_groups_0, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = hidden_states_35_strides_0, weight = layers_16_fc2_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("hidden_states_35_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = hidden_states_35_cast_fp16)[name = tensor<string, []>("inputs_103_cast_fp16")];
+            tensor<int32, []> var_3879 = const()[name = tensor<string, []>("op_3879"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_103_axes_0 = const()[name = tensor<string, []>("out_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3904_to_fp16 = const()[name = tensor<string, []>("op_3904_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_3904_to_fp16, x = inputs_103_cast_fp16)[name = tensor<string, []>("out_103_cast_fp16")];
+            tensor<fp16, [1024]> obj_239_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_239_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(678184768)))];
+            tensor<fp16, [1024]> obj_239_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_239_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(678186880)))];
+            tensor<fp16, []> obj_239_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_239_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_239_cast_fp16 = batch_norm(beta = obj_239_beta_0_to_fp16, epsilon = obj_239_epsilon_0_to_fp16, gamma = obj_239_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_103_cast_fp16)[name = tensor<string, []>("obj_239_cast_fp16")];
+            tensor<string, []> query_69_pad_type_0 = const()[name = tensor<string, []>("query_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_69_strides_0 = const()[name = tensor<string, []>("query_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_69_pad_0 = const()[name = tensor<string, []>("query_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_69_dilations_0 = const()[name = tensor<string, []>("query_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_69_groups_0 = const()[name = tensor<string, []>("query_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(678188992)))];
+            tensor<fp16, [1024]> layers_17_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(680286208)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_69_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_bias_to_fp16, dilations = query_69_dilations_0, groups = query_69_groups_0, pad = query_69_pad_0, pad_type = query_69_pad_type_0, strides = query_69_strides_0, weight = layers_17_self_attn_q_proj_weight_to_fp16, x = obj_239_cast_fp16)[name = tensor<string, []>("query_69_cast_fp16")];
+            tensor<string, []> current_key_35_pad_type_0 = const()[name = tensor<string, []>("current_key_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_35_strides_0 = const()[name = tensor<string, []>("current_key_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_35_pad_0 = const()[name = tensor<string, []>("current_key_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_35_dilations_0 = const()[name = tensor<string, []>("current_key_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_35_groups_0 = const()[name = tensor<string, []>("current_key_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(680288320)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_35_cast_fp16 = conv(dilations = current_key_35_dilations_0, groups = current_key_35_groups_0, pad = current_key_35_pad_0, pad_type = current_key_35_pad_type_0, strides = current_key_35_strides_0, weight = layers_17_self_attn_k_proj_weight_to_fp16, x = obj_239_cast_fp16)[name = tensor<string, []>("current_key_35_cast_fp16")];
+            tensor<string, []> current_value_35_pad_type_0 = const()[name = tensor<string, []>("current_value_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_35_strides_0 = const()[name = tensor<string, []>("current_value_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_35_pad_0 = const()[name = tensor<string, []>("current_value_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_35_dilations_0 = const()[name = tensor<string, []>("current_value_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_35_groups_0 = const()[name = tensor<string, []>("current_value_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(682385536)))];
+            tensor<fp16, [1024]> layers_17_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(684482752)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_35_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_bias_to_fp16, dilations = current_value_35_dilations_0, groups = current_value_35_groups_0, pad = current_value_35_pad_0, pad_type = current_value_35_pad_type_0, strides = current_value_35_strides_0, weight = layers_17_self_attn_v_proj_weight_to_fp16, x = obj_239_cast_fp16)[name = tensor<string, []>("current_value_35_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3943_cast_fp16 = mul(x = var_87_cast_fp16_17, y = var_207_cast_fp16)[name = tensor<string, []>("op_3943_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3944_cast_fp16 = mul(x = current_key_35_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3944_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_69_cast_fp16 = add(x = var_3943_cast_fp16, y = var_3944_cast_fp16)[name = tensor<string, []>("key_69_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3947_cast_fp16 = mul(x = var_114_cast_fp16_17, y = var_207_cast_fp16)[name = tensor<string, []>("op_3947_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_3948_cast_fp16 = mul(x = current_value_35_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_3948_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_69_cast_fp16 = add(x = var_3947_cast_fp16, y = var_3948_cast_fp16)[name = tensor<string, []>("value_69_cast_fp16")];
+            tensor<int32, [4]> var_3952 = const()[name = tensor<string, []>("op_3952"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_69_cast_fp16 = reshape(shape = var_3952, x = query_69_cast_fp16)[name = tensor<string, []>("mh_q_69_cast_fp16")];
+            tensor<fp16, []> var_3954_to_fp16 = const()[name = tensor<string, []>("op_3954_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_3955_cast_fp16 = mul(x = mh_q_69_cast_fp16, y = var_3954_to_fp16)[name = tensor<string, []>("op_3955_cast_fp16")];
+            tensor<int32, [4]> var_3958 = const()[name = tensor<string, []>("op_3958"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3959_cast_fp16 = reshape(shape = var_3958, x = key_69_cast_fp16)[name = tensor<string, []>("op_3959_cast_fp16")];
+            tensor<bool, []> mh_w_103_transpose_x_0 = const()[name = tensor<string, []>("mh_w_103_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_103_transpose_y_0 = const()[name = tensor<string, []>("mh_w_103_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_103_cast_fp16 = matmul(transpose_x = mh_w_103_transpose_x_0, transpose_y = mh_w_103_transpose_y_0, x = var_3955_cast_fp16, y = var_3959_cast_fp16)[name = tensor<string, []>("mh_w_103_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_105_cast_fp16 = add(x = mh_w_103_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_105_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_3967_cast_fp16 = softmax(axis = var_3879, x = mh_w_105_cast_fp16)[name = tensor<string, []>("op_3967_cast_fp16")];
+            tensor<int32, [4]> var_3968 = const()[name = tensor<string, []>("op_3968"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_3969_cast_fp16 = reshape(shape = var_3968, x = value_69_cast_fp16)[name = tensor<string, []>("op_3969_cast_fp16")];
+            tensor<bool, []> attn_69_transpose_x_0 = const()[name = tensor<string, []>("attn_69_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_69_transpose_y_0 = const()[name = tensor<string, []>("attn_69_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_69_cast_fp16 = matmul(transpose_x = attn_69_transpose_x_0, transpose_y = attn_69_transpose_y_0, x = var_3969_cast_fp16, y = var_3967_cast_fp16)[name = tensor<string, []>("attn_69_cast_fp16")];
+            tensor<int32, [4]> var_3972 = const()[name = tensor<string, []>("op_3972"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_171_cast_fp16 = reshape(shape = var_3972, x = attn_69_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> obj_245_pad_type_0 = const()[name = tensor<string, []>("obj_245_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_245_strides_0 = const()[name = tensor<string, []>("obj_245_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_245_pad_0 = const()[name = tensor<string, []>("obj_245_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_245_dilations_0 = const()[name = tensor<string, []>("obj_245_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_245_groups_0 = const()[name = tensor<string, []>("obj_245_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(684484864)))];
+            tensor<fp16, [1024]> layers_17_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686582080)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_245_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_bias_to_fp16, dilations = obj_245_dilations_0, groups = obj_245_groups_0, pad = obj_245_pad_0, pad_type = obj_245_pad_type_0, strides = obj_245_strides_0, weight = layers_17_self_attn_o_proj_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("obj_245_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = obj_245_cast_fp16)[name = tensor<string, []>("inputs_105_cast_fp16")];
+            tensor<int32, [1]> out_105_axes_0 = const()[name = tensor<string, []>("out_105_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_3994_to_fp16 = const()[name = tensor<string, []>("op_3994_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_105_cast_fp16 = layer_norm(axes = out_105_axes_0, epsilon = var_3994_to_fp16, x = inputs_105_cast_fp16)[name = tensor<string, []>("out_105_cast_fp16")];
+            tensor<fp16, [1024]> obj_247_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_247_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686584192)))];
+            tensor<fp16, [1024]> obj_247_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_247_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686586304)))];
+            tensor<fp16, []> obj_247_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_247_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_247_cast_fp16 = batch_norm(beta = obj_247_beta_0_to_fp16, epsilon = obj_247_epsilon_0_to_fp16, gamma = obj_247_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_105_cast_fp16)[name = tensor<string, []>("obj_247_cast_fp16")];
+            tensor<string, []> query_71_pad_type_0 = const()[name = tensor<string, []>("query_71_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_71_strides_0 = const()[name = tensor<string, []>("query_71_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_71_pad_0 = const()[name = tensor<string, []>("query_71_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_71_dilations_0 = const()[name = tensor<string, []>("query_71_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_71_groups_0 = const()[name = tensor<string, []>("query_71_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686588416)))];
+            tensor<fp16, [1024]> layers_17_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(688685632)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_71_cast_fp16 = conv(bias = layers_17_encoder_attn_q_proj_bias_to_fp16, dilations = query_71_dilations_0, groups = query_71_groups_0, pad = query_71_pad_0, pad_type = query_71_pad_type_0, strides = query_71_strides_0, weight = layers_17_encoder_attn_q_proj_weight_to_fp16, x = obj_247_cast_fp16)[name = tensor<string, []>("query_71_cast_fp16")];
+            tensor<string, []> key_71_pad_type_0 = const()[name = tensor<string, []>("key_71_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_71_strides_0 = const()[name = tensor<string, []>("key_71_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_71_pad_0 = const()[name = tensor<string, []>("key_71_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_71_dilations_0 = const()[name = tensor<string, []>("key_71_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_71_groups_0 = const()[name = tensor<string, []>("key_71_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(688687744)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_71_cast_fp16 = conv(dilations = key_71_dilations_0, groups = key_71_groups_0, pad = key_71_pad_0, pad_type = key_71_pad_type_0, strides = key_71_strides_0, weight = layers_17_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_71_cast_fp16")];
+            tensor<string, []> value_71_pad_type_0 = const()[name = tensor<string, []>("value_71_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_71_strides_0 = const()[name = tensor<string, []>("value_71_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_71_pad_0 = const()[name = tensor<string, []>("value_71_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_71_dilations_0 = const()[name = tensor<string, []>("value_71_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_71_groups_0 = const()[name = tensor<string, []>("value_71_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(690784960)))];
+            tensor<fp16, [1024]> layers_17_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(692882176)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_71_cast_fp16 = conv(bias = layers_17_encoder_attn_v_proj_bias_to_fp16, dilations = value_71_dilations_0, groups = value_71_groups_0, pad = value_71_pad_0, pad_type = value_71_pad_type_0, strides = value_71_strides_0, weight = layers_17_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_71_cast_fp16")];
+            tensor<int32, [4]> var_4030 = const()[name = tensor<string, []>("op_4030"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_71_cast_fp16 = reshape(shape = var_4030, x = query_71_cast_fp16)[name = tensor<string, []>("mh_q_71_cast_fp16")];
+            tensor<fp16, []> var_4032_to_fp16 = const()[name = tensor<string, []>("op_4032_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4033_cast_fp16 = mul(x = mh_q_71_cast_fp16, y = var_4032_to_fp16)[name = tensor<string, []>("op_4033_cast_fp16")];
+            tensor<int32, [4]> var_4036 = const()[name = tensor<string, []>("op_4036"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4037_cast_fp16 = reshape(shape = var_4036, x = key_71_cast_fp16)[name = tensor<string, []>("op_4037_cast_fp16")];
+            tensor<bool, []> mh_w_107_transpose_x_0 = const()[name = tensor<string, []>("mh_w_107_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_107_transpose_y_0 = const()[name = tensor<string, []>("mh_w_107_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_107_cast_fp16 = matmul(transpose_x = mh_w_107_transpose_x_0, transpose_y = mh_w_107_transpose_y_0, x = var_4033_cast_fp16, y = var_4037_cast_fp16)[name = tensor<string, []>("mh_w_107_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_251_cast_fp16 = softmax(axis = var_3879, x = mh_w_107_cast_fp16)[name = tensor<string, []>("obj_251_cast_fp16")];
+            tensor<int32, [4]> var_4041 = const()[name = tensor<string, []>("op_4041"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4042_cast_fp16 = reshape(shape = var_4041, x = value_71_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<bool, []> attn_71_transpose_x_0 = const()[name = tensor<string, []>("attn_71_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_71_transpose_y_0 = const()[name = tensor<string, []>("attn_71_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_71_cast_fp16 = matmul(transpose_x = attn_71_transpose_x_0, transpose_y = attn_71_transpose_y_0, x = var_4042_cast_fp16, y = obj_251_cast_fp16)[name = tensor<string, []>("attn_71_cast_fp16")];
+            tensor<int32, [4]> var_4045 = const()[name = tensor<string, []>("op_4045"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_173_cast_fp16 = reshape(shape = var_4045, x = attn_71_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> obj_249_pad_type_0 = const()[name = tensor<string, []>("obj_249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_249_strides_0 = const()[name = tensor<string, []>("obj_249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_249_pad_0 = const()[name = tensor<string, []>("obj_249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_249_dilations_0 = const()[name = tensor<string, []>("obj_249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_249_groups_0 = const()[name = tensor<string, []>("obj_249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_17_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(692884288)))];
+            tensor<fp16, [1024]> layers_17_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(694981504)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_249_cast_fp16 = conv(bias = layers_17_encoder_attn_o_proj_bias_to_fp16, dilations = obj_249_dilations_0, groups = obj_249_groups_0, pad = obj_249_pad_0, pad_type = obj_249_pad_type_0, strides = obj_249_strides_0, weight = layers_17_encoder_attn_o_proj_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("obj_249_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = obj_249_cast_fp16)[name = tensor<string, []>("inputs_107_cast_fp16")];
+            tensor<int32, [1]> out_107_axes_0 = const()[name = tensor<string, []>("out_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4063_to_fp16 = const()[name = tensor<string, []>("op_4063_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_107_cast_fp16 = layer_norm(axes = out_107_axes_0, epsilon = var_4063_to_fp16, x = inputs_107_cast_fp16)[name = tensor<string, []>("out_107_cast_fp16")];
+            tensor<fp16, [1024]> input_175_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_175_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(694983616)))];
+            tensor<fp16, [1024]> input_175_beta_0_to_fp16 = const()[name = tensor<string, []>("input_175_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(694985728)))];
+            tensor<fp16, []> input_175_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_175_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_175_cast_fp16 = batch_norm(beta = input_175_beta_0_to_fp16, epsilon = input_175_epsilon_0_to_fp16, gamma = input_175_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_107_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> input_177_pad_type_0 = const()[name = tensor<string, []>("input_177_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_177_strides_0 = const()[name = tensor<string, []>("input_177_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_177_pad_0 = const()[name = tensor<string, []>("input_177_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_177_dilations_0 = const()[name = tensor<string, []>("input_177_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_177_groups_0 = const()[name = tensor<string, []>("input_177_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_17_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(694987840)))];
+            tensor<fp16, [4096]> layers_17_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(703376512)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_177_cast_fp16 = conv(bias = layers_17_fc1_bias_to_fp16, dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = layers_17_fc1_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_mode_0 = const()[name = tensor<string, []>("input_179_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_179_cast_fp16 = gelu(mode = input_179_mode_0, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> hidden_states_37_pad_type_0 = const()[name = tensor<string, []>("hidden_states_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_37_strides_0 = const()[name = tensor<string, []>("hidden_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_37_pad_0 = const()[name = tensor<string, []>("hidden_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_37_dilations_0 = const()[name = tensor<string, []>("hidden_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_37_groups_0 = const()[name = tensor<string, []>("hidden_states_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_17_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_17_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(703384768)))];
+            tensor<fp16, [1024]> layers_17_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_17_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(711773440)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_37_cast_fp16 = conv(bias = layers_17_fc2_bias_to_fp16, dilations = hidden_states_37_dilations_0, groups = hidden_states_37_groups_0, pad = hidden_states_37_pad_0, pad_type = hidden_states_37_pad_type_0, strides = hidden_states_37_strides_0, weight = layers_17_fc2_weight_to_fp16, x = input_179_cast_fp16)[name = tensor<string, []>("hidden_states_37_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor<string, []>("inputs_109_cast_fp16")];
+            tensor<int32, []> var_4098 = const()[name = tensor<string, []>("op_4098"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_109_axes_0 = const()[name = tensor<string, []>("out_109_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4123_to_fp16 = const()[name = tensor<string, []>("op_4123_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_4123_to_fp16, x = inputs_109_cast_fp16)[name = tensor<string, []>("out_109_cast_fp16")];
+            tensor<fp16, [1024]> obj_253_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_253_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(711775552)))];
+            tensor<fp16, [1024]> obj_253_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_253_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(711777664)))];
+            tensor<fp16, []> obj_253_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_253_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_253_cast_fp16 = batch_norm(beta = obj_253_beta_0_to_fp16, epsilon = obj_253_epsilon_0_to_fp16, gamma = obj_253_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_109_cast_fp16)[name = tensor<string, []>("obj_253_cast_fp16")];
+            tensor<string, []> query_73_pad_type_0 = const()[name = tensor<string, []>("query_73_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_73_strides_0 = const()[name = tensor<string, []>("query_73_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_73_pad_0 = const()[name = tensor<string, []>("query_73_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_73_dilations_0 = const()[name = tensor<string, []>("query_73_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_73_groups_0 = const()[name = tensor<string, []>("query_73_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(711779776)))];
+            tensor<fp16, [1024]> layers_18_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(713876992)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_73_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_bias_to_fp16, dilations = query_73_dilations_0, groups = query_73_groups_0, pad = query_73_pad_0, pad_type = query_73_pad_type_0, strides = query_73_strides_0, weight = layers_18_self_attn_q_proj_weight_to_fp16, x = obj_253_cast_fp16)[name = tensor<string, []>("query_73_cast_fp16")];
+            tensor<string, []> current_key_37_pad_type_0 = const()[name = tensor<string, []>("current_key_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_37_strides_0 = const()[name = tensor<string, []>("current_key_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_37_pad_0 = const()[name = tensor<string, []>("current_key_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_37_dilations_0 = const()[name = tensor<string, []>("current_key_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_37_groups_0 = const()[name = tensor<string, []>("current_key_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(713879104)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_37_cast_fp16 = conv(dilations = current_key_37_dilations_0, groups = current_key_37_groups_0, pad = current_key_37_pad_0, pad_type = current_key_37_pad_type_0, strides = current_key_37_strides_0, weight = layers_18_self_attn_k_proj_weight_to_fp16, x = obj_253_cast_fp16)[name = tensor<string, []>("current_key_37_cast_fp16")];
+            tensor<string, []> current_value_37_pad_type_0 = const()[name = tensor<string, []>("current_value_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_37_strides_0 = const()[name = tensor<string, []>("current_value_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_37_pad_0 = const()[name = tensor<string, []>("current_value_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_37_dilations_0 = const()[name = tensor<string, []>("current_value_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_37_groups_0 = const()[name = tensor<string, []>("current_value_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(715976320)))];
+            tensor<fp16, [1024]> layers_18_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(718073536)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_37_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_bias_to_fp16, dilations = current_value_37_dilations_0, groups = current_value_37_groups_0, pad = current_value_37_pad_0, pad_type = current_value_37_pad_type_0, strides = current_value_37_strides_0, weight = layers_18_self_attn_v_proj_weight_to_fp16, x = obj_253_cast_fp16)[name = tensor<string, []>("current_value_37_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4162_cast_fp16 = mul(x = var_87_cast_fp16_18, y = var_207_cast_fp16)[name = tensor<string, []>("op_4162_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4163_cast_fp16 = mul(x = current_key_37_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4163_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_73_cast_fp16 = add(x = var_4162_cast_fp16, y = var_4163_cast_fp16)[name = tensor<string, []>("key_73_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4166_cast_fp16 = mul(x = var_114_cast_fp16_18, y = var_207_cast_fp16)[name = tensor<string, []>("op_4166_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4167_cast_fp16 = mul(x = current_value_37_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4167_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_73_cast_fp16 = add(x = var_4166_cast_fp16, y = var_4167_cast_fp16)[name = tensor<string, []>("value_73_cast_fp16")];
+            tensor<int32, [4]> var_4171 = const()[name = tensor<string, []>("op_4171"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_73_cast_fp16 = reshape(shape = var_4171, x = query_73_cast_fp16)[name = tensor<string, []>("mh_q_73_cast_fp16")];
+            tensor<fp16, []> var_4173_to_fp16 = const()[name = tensor<string, []>("op_4173_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4174_cast_fp16 = mul(x = mh_q_73_cast_fp16, y = var_4173_to_fp16)[name = tensor<string, []>("op_4174_cast_fp16")];
+            tensor<int32, [4]> var_4177 = const()[name = tensor<string, []>("op_4177"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4178_cast_fp16 = reshape(shape = var_4177, x = key_73_cast_fp16)[name = tensor<string, []>("op_4178_cast_fp16")];
+            tensor<bool, []> mh_w_109_transpose_x_0 = const()[name = tensor<string, []>("mh_w_109_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_109_transpose_y_0 = const()[name = tensor<string, []>("mh_w_109_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_109_cast_fp16 = matmul(transpose_x = mh_w_109_transpose_x_0, transpose_y = mh_w_109_transpose_y_0, x = var_4174_cast_fp16, y = var_4178_cast_fp16)[name = tensor<string, []>("mh_w_109_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_111_cast_fp16 = add(x = mh_w_109_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_111_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_4186_cast_fp16 = softmax(axis = var_4098, x = mh_w_111_cast_fp16)[name = tensor<string, []>("op_4186_cast_fp16")];
+            tensor<int32, [4]> var_4187 = const()[name = tensor<string, []>("op_4187"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4188_cast_fp16 = reshape(shape = var_4187, x = value_73_cast_fp16)[name = tensor<string, []>("op_4188_cast_fp16")];
+            tensor<bool, []> attn_73_transpose_x_0 = const()[name = tensor<string, []>("attn_73_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_73_transpose_y_0 = const()[name = tensor<string, []>("attn_73_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_73_cast_fp16 = matmul(transpose_x = attn_73_transpose_x_0, transpose_y = attn_73_transpose_y_0, x = var_4188_cast_fp16, y = var_4186_cast_fp16)[name = tensor<string, []>("attn_73_cast_fp16")];
+            tensor<int32, [4]> var_4191 = const()[name = tensor<string, []>("op_4191"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_181_cast_fp16 = reshape(shape = var_4191, x = attn_73_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> obj_259_pad_type_0 = const()[name = tensor<string, []>("obj_259_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_259_strides_0 = const()[name = tensor<string, []>("obj_259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_259_pad_0 = const()[name = tensor<string, []>("obj_259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_259_dilations_0 = const()[name = tensor<string, []>("obj_259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_259_groups_0 = const()[name = tensor<string, []>("obj_259_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(718075648)))];
+            tensor<fp16, [1024]> layers_18_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(720172864)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_259_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_bias_to_fp16, dilations = obj_259_dilations_0, groups = obj_259_groups_0, pad = obj_259_pad_0, pad_type = obj_259_pad_type_0, strides = obj_259_strides_0, weight = layers_18_self_attn_o_proj_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("obj_259_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = obj_259_cast_fp16)[name = tensor<string, []>("inputs_111_cast_fp16")];
+            tensor<int32, [1]> out_111_axes_0 = const()[name = tensor<string, []>("out_111_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4213_to_fp16 = const()[name = tensor<string, []>("op_4213_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_111_cast_fp16 = layer_norm(axes = out_111_axes_0, epsilon = var_4213_to_fp16, x = inputs_111_cast_fp16)[name = tensor<string, []>("out_111_cast_fp16")];
+            tensor<fp16, [1024]> obj_261_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_261_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(720174976)))];
+            tensor<fp16, [1024]> obj_261_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_261_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(720177088)))];
+            tensor<fp16, []> obj_261_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_261_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_261_cast_fp16 = batch_norm(beta = obj_261_beta_0_to_fp16, epsilon = obj_261_epsilon_0_to_fp16, gamma = obj_261_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_111_cast_fp16)[name = tensor<string, []>("obj_261_cast_fp16")];
+            tensor<string, []> query_75_pad_type_0 = const()[name = tensor<string, []>("query_75_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_75_strides_0 = const()[name = tensor<string, []>("query_75_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_75_pad_0 = const()[name = tensor<string, []>("query_75_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_75_dilations_0 = const()[name = tensor<string, []>("query_75_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_75_groups_0 = const()[name = tensor<string, []>("query_75_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(720179200)))];
+            tensor<fp16, [1024]> layers_18_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722276416)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_75_cast_fp16 = conv(bias = layers_18_encoder_attn_q_proj_bias_to_fp16, dilations = query_75_dilations_0, groups = query_75_groups_0, pad = query_75_pad_0, pad_type = query_75_pad_type_0, strides = query_75_strides_0, weight = layers_18_encoder_attn_q_proj_weight_to_fp16, x = obj_261_cast_fp16)[name = tensor<string, []>("query_75_cast_fp16")];
+            tensor<string, []> key_75_pad_type_0 = const()[name = tensor<string, []>("key_75_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_75_strides_0 = const()[name = tensor<string, []>("key_75_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_75_pad_0 = const()[name = tensor<string, []>("key_75_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_75_dilations_0 = const()[name = tensor<string, []>("key_75_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_75_groups_0 = const()[name = tensor<string, []>("key_75_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722278528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_75_cast_fp16 = conv(dilations = key_75_dilations_0, groups = key_75_groups_0, pad = key_75_pad_0, pad_type = key_75_pad_type_0, strides = key_75_strides_0, weight = layers_18_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_75_cast_fp16")];
+            tensor<string, []> value_75_pad_type_0 = const()[name = tensor<string, []>("value_75_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_75_strides_0 = const()[name = tensor<string, []>("value_75_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_75_pad_0 = const()[name = tensor<string, []>("value_75_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_75_dilations_0 = const()[name = tensor<string, []>("value_75_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_75_groups_0 = const()[name = tensor<string, []>("value_75_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(724375744)))];
+            tensor<fp16, [1024]> layers_18_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(726472960)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_75_cast_fp16 = conv(bias = layers_18_encoder_attn_v_proj_bias_to_fp16, dilations = value_75_dilations_0, groups = value_75_groups_0, pad = value_75_pad_0, pad_type = value_75_pad_type_0, strides = value_75_strides_0, weight = layers_18_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_75_cast_fp16")];
+            tensor<int32, [4]> var_4249 = const()[name = tensor<string, []>("op_4249"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_75_cast_fp16 = reshape(shape = var_4249, x = query_75_cast_fp16)[name = tensor<string, []>("mh_q_75_cast_fp16")];
+            tensor<fp16, []> var_4251_to_fp16 = const()[name = tensor<string, []>("op_4251_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4252_cast_fp16 = mul(x = mh_q_75_cast_fp16, y = var_4251_to_fp16)[name = tensor<string, []>("op_4252_cast_fp16")];
+            tensor<int32, [4]> var_4255 = const()[name = tensor<string, []>("op_4255"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4256_cast_fp16 = reshape(shape = var_4255, x = key_75_cast_fp16)[name = tensor<string, []>("op_4256_cast_fp16")];
+            tensor<bool, []> mh_w_113_transpose_x_0 = const()[name = tensor<string, []>("mh_w_113_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_113_transpose_y_0 = const()[name = tensor<string, []>("mh_w_113_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_113_cast_fp16 = matmul(transpose_x = mh_w_113_transpose_x_0, transpose_y = mh_w_113_transpose_y_0, x = var_4252_cast_fp16, y = var_4256_cast_fp16)[name = tensor<string, []>("mh_w_113_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_265_cast_fp16 = softmax(axis = var_4098, x = mh_w_113_cast_fp16)[name = tensor<string, []>("obj_265_cast_fp16")];
+            tensor<int32, [4]> var_4260 = const()[name = tensor<string, []>("op_4260"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4261_cast_fp16 = reshape(shape = var_4260, x = value_75_cast_fp16)[name = tensor<string, []>("op_4261_cast_fp16")];
+            tensor<bool, []> attn_75_transpose_x_0 = const()[name = tensor<string, []>("attn_75_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_75_transpose_y_0 = const()[name = tensor<string, []>("attn_75_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_75_cast_fp16 = matmul(transpose_x = attn_75_transpose_x_0, transpose_y = attn_75_transpose_y_0, x = var_4261_cast_fp16, y = obj_265_cast_fp16)[name = tensor<string, []>("attn_75_cast_fp16")];
+            tensor<int32, [4]> var_4264 = const()[name = tensor<string, []>("op_4264"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_183_cast_fp16 = reshape(shape = var_4264, x = attn_75_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> obj_263_pad_type_0 = const()[name = tensor<string, []>("obj_263_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_263_strides_0 = const()[name = tensor<string, []>("obj_263_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_263_pad_0 = const()[name = tensor<string, []>("obj_263_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_263_dilations_0 = const()[name = tensor<string, []>("obj_263_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_263_groups_0 = const()[name = tensor<string, []>("obj_263_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_18_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(726475072)))];
+            tensor<fp16, [1024]> layers_18_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(728572288)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_263_cast_fp16 = conv(bias = layers_18_encoder_attn_o_proj_bias_to_fp16, dilations = obj_263_dilations_0, groups = obj_263_groups_0, pad = obj_263_pad_0, pad_type = obj_263_pad_type_0, strides = obj_263_strides_0, weight = layers_18_encoder_attn_o_proj_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("obj_263_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = obj_263_cast_fp16)[name = tensor<string, []>("inputs_113_cast_fp16")];
+            tensor<int32, [1]> out_113_axes_0 = const()[name = tensor<string, []>("out_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4282_to_fp16 = const()[name = tensor<string, []>("op_4282_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_113_cast_fp16 = layer_norm(axes = out_113_axes_0, epsilon = var_4282_to_fp16, x = inputs_113_cast_fp16)[name = tensor<string, []>("out_113_cast_fp16")];
+            tensor<fp16, [1024]> input_185_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_185_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(728574400)))];
+            tensor<fp16, [1024]> input_185_beta_0_to_fp16 = const()[name = tensor<string, []>("input_185_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(728576512)))];
+            tensor<fp16, []> input_185_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_185_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_185_cast_fp16 = batch_norm(beta = input_185_beta_0_to_fp16, epsilon = input_185_epsilon_0_to_fp16, gamma = input_185_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_113_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> input_187_pad_type_0 = const()[name = tensor<string, []>("input_187_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_187_strides_0 = const()[name = tensor<string, []>("input_187_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_187_pad_0 = const()[name = tensor<string, []>("input_187_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_187_dilations_0 = const()[name = tensor<string, []>("input_187_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_187_groups_0 = const()[name = tensor<string, []>("input_187_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_18_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(728578624)))];
+            tensor<fp16, [4096]> layers_18_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736967296)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_187_cast_fp16 = conv(bias = layers_18_fc1_bias_to_fp16, dilations = input_187_dilations_0, groups = input_187_groups_0, pad = input_187_pad_0, pad_type = input_187_pad_type_0, strides = input_187_strides_0, weight = layers_18_fc1_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_mode_0 = const()[name = tensor<string, []>("input_189_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_189_cast_fp16 = gelu(mode = input_189_mode_0, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> hidden_states_39_pad_type_0 = const()[name = tensor<string, []>("hidden_states_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = tensor<string, []>("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = tensor<string, []>("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = tensor<string, []>("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_39_groups_0 = const()[name = tensor<string, []>("hidden_states_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_18_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_18_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736975552)))];
+            tensor<fp16, [1024]> layers_18_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_18_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(745364224)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_39_cast_fp16 = conv(bias = layers_18_fc2_bias_to_fp16, dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = layers_18_fc2_weight_to_fp16, x = input_189_cast_fp16)[name = tensor<string, []>("hidden_states_39_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor<string, []>("inputs_115_cast_fp16")];
+            tensor<int32, []> var_4317 = const()[name = tensor<string, []>("op_4317"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_115_axes_0 = const()[name = tensor<string, []>("out_115_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4342_to_fp16 = const()[name = tensor<string, []>("op_4342_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_4342_to_fp16, x = inputs_115_cast_fp16)[name = tensor<string, []>("out_115_cast_fp16")];
+            tensor<fp16, [1024]> obj_267_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_267_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(745366336)))];
+            tensor<fp16, [1024]> obj_267_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_267_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(745368448)))];
+            tensor<fp16, []> obj_267_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_267_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_267_cast_fp16 = batch_norm(beta = obj_267_beta_0_to_fp16, epsilon = obj_267_epsilon_0_to_fp16, gamma = obj_267_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_115_cast_fp16)[name = tensor<string, []>("obj_267_cast_fp16")];
+            tensor<string, []> query_77_pad_type_0 = const()[name = tensor<string, []>("query_77_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_77_strides_0 = const()[name = tensor<string, []>("query_77_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_77_pad_0 = const()[name = tensor<string, []>("query_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_77_dilations_0 = const()[name = tensor<string, []>("query_77_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_77_groups_0 = const()[name = tensor<string, []>("query_77_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(745370560)))];
+            tensor<fp16, [1024]> layers_19_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(747467776)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_77_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_bias_to_fp16, dilations = query_77_dilations_0, groups = query_77_groups_0, pad = query_77_pad_0, pad_type = query_77_pad_type_0, strides = query_77_strides_0, weight = layers_19_self_attn_q_proj_weight_to_fp16, x = obj_267_cast_fp16)[name = tensor<string, []>("query_77_cast_fp16")];
+            tensor<string, []> current_key_39_pad_type_0 = const()[name = tensor<string, []>("current_key_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_39_strides_0 = const()[name = tensor<string, []>("current_key_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_39_pad_0 = const()[name = tensor<string, []>("current_key_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_39_dilations_0 = const()[name = tensor<string, []>("current_key_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_39_groups_0 = const()[name = tensor<string, []>("current_key_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(747469888)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_39_cast_fp16 = conv(dilations = current_key_39_dilations_0, groups = current_key_39_groups_0, pad = current_key_39_pad_0, pad_type = current_key_39_pad_type_0, strides = current_key_39_strides_0, weight = layers_19_self_attn_k_proj_weight_to_fp16, x = obj_267_cast_fp16)[name = tensor<string, []>("current_key_39_cast_fp16")];
+            tensor<string, []> current_value_39_pad_type_0 = const()[name = tensor<string, []>("current_value_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_39_strides_0 = const()[name = tensor<string, []>("current_value_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_39_pad_0 = const()[name = tensor<string, []>("current_value_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_39_dilations_0 = const()[name = tensor<string, []>("current_value_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_39_groups_0 = const()[name = tensor<string, []>("current_value_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(749567104)))];
+            tensor<fp16, [1024]> layers_19_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(751664320)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_39_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_bias_to_fp16, dilations = current_value_39_dilations_0, groups = current_value_39_groups_0, pad = current_value_39_pad_0, pad_type = current_value_39_pad_type_0, strides = current_value_39_strides_0, weight = layers_19_self_attn_v_proj_weight_to_fp16, x = obj_267_cast_fp16)[name = tensor<string, []>("current_value_39_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4381_cast_fp16 = mul(x = var_87_cast_fp16_19, y = var_207_cast_fp16)[name = tensor<string, []>("op_4381_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4382_cast_fp16 = mul(x = current_key_39_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4382_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_77_cast_fp16 = add(x = var_4381_cast_fp16, y = var_4382_cast_fp16)[name = tensor<string, []>("key_77_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4385_cast_fp16 = mul(x = var_114_cast_fp16_19, y = var_207_cast_fp16)[name = tensor<string, []>("op_4385_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4386_cast_fp16 = mul(x = current_value_39_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4386_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_77_cast_fp16 = add(x = var_4385_cast_fp16, y = var_4386_cast_fp16)[name = tensor<string, []>("value_77_cast_fp16")];
+            tensor<int32, [4]> var_4390 = const()[name = tensor<string, []>("op_4390"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_77_cast_fp16 = reshape(shape = var_4390, x = query_77_cast_fp16)[name = tensor<string, []>("mh_q_77_cast_fp16")];
+            tensor<fp16, []> var_4392_to_fp16 = const()[name = tensor<string, []>("op_4392_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4393_cast_fp16 = mul(x = mh_q_77_cast_fp16, y = var_4392_to_fp16)[name = tensor<string, []>("op_4393_cast_fp16")];
+            tensor<int32, [4]> var_4396 = const()[name = tensor<string, []>("op_4396"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4397_cast_fp16 = reshape(shape = var_4396, x = key_77_cast_fp16)[name = tensor<string, []>("op_4397_cast_fp16")];
+            tensor<bool, []> mh_w_115_transpose_x_0 = const()[name = tensor<string, []>("mh_w_115_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_115_transpose_y_0 = const()[name = tensor<string, []>("mh_w_115_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_115_cast_fp16 = matmul(transpose_x = mh_w_115_transpose_x_0, transpose_y = mh_w_115_transpose_y_0, x = var_4393_cast_fp16, y = var_4397_cast_fp16)[name = tensor<string, []>("mh_w_115_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_117_cast_fp16 = add(x = mh_w_115_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_117_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_4405_cast_fp16 = softmax(axis = var_4317, x = mh_w_117_cast_fp16)[name = tensor<string, []>("op_4405_cast_fp16")];
+            tensor<int32, [4]> var_4406 = const()[name = tensor<string, []>("op_4406"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4407_cast_fp16 = reshape(shape = var_4406, x = value_77_cast_fp16)[name = tensor<string, []>("op_4407_cast_fp16")];
+            tensor<bool, []> attn_77_transpose_x_0 = const()[name = tensor<string, []>("attn_77_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_77_transpose_y_0 = const()[name = tensor<string, []>("attn_77_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_77_cast_fp16 = matmul(transpose_x = attn_77_transpose_x_0, transpose_y = attn_77_transpose_y_0, x = var_4407_cast_fp16, y = var_4405_cast_fp16)[name = tensor<string, []>("attn_77_cast_fp16")];
+            tensor<int32, [4]> var_4410 = const()[name = tensor<string, []>("op_4410"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_191_cast_fp16 = reshape(shape = var_4410, x = attn_77_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> obj_273_pad_type_0 = const()[name = tensor<string, []>("obj_273_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_273_strides_0 = const()[name = tensor<string, []>("obj_273_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_273_pad_0 = const()[name = tensor<string, []>("obj_273_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_273_dilations_0 = const()[name = tensor<string, []>("obj_273_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_273_groups_0 = const()[name = tensor<string, []>("obj_273_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(751666432)))];
+            tensor<fp16, [1024]> layers_19_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(753763648)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_273_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_bias_to_fp16, dilations = obj_273_dilations_0, groups = obj_273_groups_0, pad = obj_273_pad_0, pad_type = obj_273_pad_type_0, strides = obj_273_strides_0, weight = layers_19_self_attn_o_proj_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("obj_273_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = obj_273_cast_fp16)[name = tensor<string, []>("inputs_117_cast_fp16")];
+            tensor<int32, [1]> out_117_axes_0 = const()[name = tensor<string, []>("out_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4432_to_fp16 = const()[name = tensor<string, []>("op_4432_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_117_cast_fp16 = layer_norm(axes = out_117_axes_0, epsilon = var_4432_to_fp16, x = inputs_117_cast_fp16)[name = tensor<string, []>("out_117_cast_fp16")];
+            tensor<fp16, [1024]> obj_275_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_275_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(753765760)))];
+            tensor<fp16, [1024]> obj_275_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_275_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(753767872)))];
+            tensor<fp16, []> obj_275_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_275_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_275_cast_fp16 = batch_norm(beta = obj_275_beta_0_to_fp16, epsilon = obj_275_epsilon_0_to_fp16, gamma = obj_275_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_117_cast_fp16)[name = tensor<string, []>("obj_275_cast_fp16")];
+            tensor<string, []> query_79_pad_type_0 = const()[name = tensor<string, []>("query_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_79_strides_0 = const()[name = tensor<string, []>("query_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_79_pad_0 = const()[name = tensor<string, []>("query_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_79_dilations_0 = const()[name = tensor<string, []>("query_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_79_groups_0 = const()[name = tensor<string, []>("query_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(753769984)))];
+            tensor<fp16, [1024]> layers_19_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(755867200)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_79_cast_fp16 = conv(bias = layers_19_encoder_attn_q_proj_bias_to_fp16, dilations = query_79_dilations_0, groups = query_79_groups_0, pad = query_79_pad_0, pad_type = query_79_pad_type_0, strides = query_79_strides_0, weight = layers_19_encoder_attn_q_proj_weight_to_fp16, x = obj_275_cast_fp16)[name = tensor<string, []>("query_79_cast_fp16")];
+            tensor<string, []> key_79_pad_type_0 = const()[name = tensor<string, []>("key_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_79_strides_0 = const()[name = tensor<string, []>("key_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_79_pad_0 = const()[name = tensor<string, []>("key_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_79_dilations_0 = const()[name = tensor<string, []>("key_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_79_groups_0 = const()[name = tensor<string, []>("key_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(755869312)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_79_cast_fp16 = conv(dilations = key_79_dilations_0, groups = key_79_groups_0, pad = key_79_pad_0, pad_type = key_79_pad_type_0, strides = key_79_strides_0, weight = layers_19_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_79_cast_fp16")];
+            tensor<string, []> value_79_pad_type_0 = const()[name = tensor<string, []>("value_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_79_strides_0 = const()[name = tensor<string, []>("value_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_79_pad_0 = const()[name = tensor<string, []>("value_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_79_dilations_0 = const()[name = tensor<string, []>("value_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_79_groups_0 = const()[name = tensor<string, []>("value_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(757966528)))];
+            tensor<fp16, [1024]> layers_19_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(760063744)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_79_cast_fp16 = conv(bias = layers_19_encoder_attn_v_proj_bias_to_fp16, dilations = value_79_dilations_0, groups = value_79_groups_0, pad = value_79_pad_0, pad_type = value_79_pad_type_0, strides = value_79_strides_0, weight = layers_19_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_79_cast_fp16")];
+            tensor<int32, [4]> var_4468 = const()[name = tensor<string, []>("op_4468"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_79_cast_fp16 = reshape(shape = var_4468, x = query_79_cast_fp16)[name = tensor<string, []>("mh_q_79_cast_fp16")];
+            tensor<fp16, []> var_4470_to_fp16 = const()[name = tensor<string, []>("op_4470_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4471_cast_fp16 = mul(x = mh_q_79_cast_fp16, y = var_4470_to_fp16)[name = tensor<string, []>("op_4471_cast_fp16")];
+            tensor<int32, [4]> var_4474 = const()[name = tensor<string, []>("op_4474"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4475_cast_fp16 = reshape(shape = var_4474, x = key_79_cast_fp16)[name = tensor<string, []>("op_4475_cast_fp16")];
+            tensor<bool, []> mh_w_119_transpose_x_0 = const()[name = tensor<string, []>("mh_w_119_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_119_transpose_y_0 = const()[name = tensor<string, []>("mh_w_119_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_119_cast_fp16 = matmul(transpose_x = mh_w_119_transpose_x_0, transpose_y = mh_w_119_transpose_y_0, x = var_4471_cast_fp16, y = var_4475_cast_fp16)[name = tensor<string, []>("mh_w_119_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_279_cast_fp16 = softmax(axis = var_4317, x = mh_w_119_cast_fp16)[name = tensor<string, []>("obj_279_cast_fp16")];
+            tensor<int32, [4]> var_4479 = const()[name = tensor<string, []>("op_4479"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4480_cast_fp16 = reshape(shape = var_4479, x = value_79_cast_fp16)[name = tensor<string, []>("op_4480_cast_fp16")];
+            tensor<bool, []> attn_79_transpose_x_0 = const()[name = tensor<string, []>("attn_79_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_79_transpose_y_0 = const()[name = tensor<string, []>("attn_79_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_79_cast_fp16 = matmul(transpose_x = attn_79_transpose_x_0, transpose_y = attn_79_transpose_y_0, x = var_4480_cast_fp16, y = obj_279_cast_fp16)[name = tensor<string, []>("attn_79_cast_fp16")];
+            tensor<int32, [4]> var_4483 = const()[name = tensor<string, []>("op_4483"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_193_cast_fp16 = reshape(shape = var_4483, x = attn_79_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> obj_277_pad_type_0 = const()[name = tensor<string, []>("obj_277_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_277_strides_0 = const()[name = tensor<string, []>("obj_277_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_277_pad_0 = const()[name = tensor<string, []>("obj_277_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_277_dilations_0 = const()[name = tensor<string, []>("obj_277_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_277_groups_0 = const()[name = tensor<string, []>("obj_277_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_19_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(760065856)))];
+            tensor<fp16, [1024]> layers_19_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762163072)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_277_cast_fp16 = conv(bias = layers_19_encoder_attn_o_proj_bias_to_fp16, dilations = obj_277_dilations_0, groups = obj_277_groups_0, pad = obj_277_pad_0, pad_type = obj_277_pad_type_0, strides = obj_277_strides_0, weight = layers_19_encoder_attn_o_proj_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("obj_277_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = obj_277_cast_fp16)[name = tensor<string, []>("inputs_119_cast_fp16")];
+            tensor<int32, [1]> out_119_axes_0 = const()[name = tensor<string, []>("out_119_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4501_to_fp16 = const()[name = tensor<string, []>("op_4501_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_119_cast_fp16 = layer_norm(axes = out_119_axes_0, epsilon = var_4501_to_fp16, x = inputs_119_cast_fp16)[name = tensor<string, []>("out_119_cast_fp16")];
+            tensor<fp16, [1024]> input_195_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_195_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762165184)))];
+            tensor<fp16, [1024]> input_195_beta_0_to_fp16 = const()[name = tensor<string, []>("input_195_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762167296)))];
+            tensor<fp16, []> input_195_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_195_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_195_cast_fp16 = batch_norm(beta = input_195_beta_0_to_fp16, epsilon = input_195_epsilon_0_to_fp16, gamma = input_195_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_119_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> input_197_pad_type_0 = const()[name = tensor<string, []>("input_197_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_197_strides_0 = const()[name = tensor<string, []>("input_197_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_197_pad_0 = const()[name = tensor<string, []>("input_197_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_197_dilations_0 = const()[name = tensor<string, []>("input_197_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_197_groups_0 = const()[name = tensor<string, []>("input_197_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_19_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762169408)))];
+            tensor<fp16, [4096]> layers_19_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(770558080)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_197_cast_fp16 = conv(bias = layers_19_fc1_bias_to_fp16, dilations = input_197_dilations_0, groups = input_197_groups_0, pad = input_197_pad_0, pad_type = input_197_pad_type_0, strides = input_197_strides_0, weight = layers_19_fc1_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_mode_0 = const()[name = tensor<string, []>("input_199_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_199_cast_fp16 = gelu(mode = input_199_mode_0, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> hidden_states_41_pad_type_0 = const()[name = tensor<string, []>("hidden_states_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_41_strides_0 = const()[name = tensor<string, []>("hidden_states_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_41_pad_0 = const()[name = tensor<string, []>("hidden_states_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_41_dilations_0 = const()[name = tensor<string, []>("hidden_states_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_41_groups_0 = const()[name = tensor<string, []>("hidden_states_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_19_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_19_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(770566336)))];
+            tensor<fp16, [1024]> layers_19_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_19_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(778955008)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_41_cast_fp16 = conv(bias = layers_19_fc2_bias_to_fp16, dilations = hidden_states_41_dilations_0, groups = hidden_states_41_groups_0, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = hidden_states_41_strides_0, weight = layers_19_fc2_weight_to_fp16, x = input_199_cast_fp16)[name = tensor<string, []>("hidden_states_41_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor<string, []>("inputs_121_cast_fp16")];
+            tensor<int32, []> var_4536 = const()[name = tensor<string, []>("op_4536"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_121_axes_0 = const()[name = tensor<string, []>("out_121_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4561_to_fp16 = const()[name = tensor<string, []>("op_4561_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_4561_to_fp16, x = inputs_121_cast_fp16)[name = tensor<string, []>("out_121_cast_fp16")];
+            tensor<fp16, [1024]> obj_281_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_281_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(778957120)))];
+            tensor<fp16, [1024]> obj_281_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_281_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(778959232)))];
+            tensor<fp16, []> obj_281_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_281_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_281_cast_fp16 = batch_norm(beta = obj_281_beta_0_to_fp16, epsilon = obj_281_epsilon_0_to_fp16, gamma = obj_281_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_121_cast_fp16)[name = tensor<string, []>("obj_281_cast_fp16")];
+            tensor<string, []> query_81_pad_type_0 = const()[name = tensor<string, []>("query_81_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_81_strides_0 = const()[name = tensor<string, []>("query_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_81_pad_0 = const()[name = tensor<string, []>("query_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_81_dilations_0 = const()[name = tensor<string, []>("query_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_81_groups_0 = const()[name = tensor<string, []>("query_81_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(778961344)))];
+            tensor<fp16, [1024]> layers_20_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(781058560)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_81_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_bias_to_fp16, dilations = query_81_dilations_0, groups = query_81_groups_0, pad = query_81_pad_0, pad_type = query_81_pad_type_0, strides = query_81_strides_0, weight = layers_20_self_attn_q_proj_weight_to_fp16, x = obj_281_cast_fp16)[name = tensor<string, []>("query_81_cast_fp16")];
+            tensor<string, []> current_key_41_pad_type_0 = const()[name = tensor<string, []>("current_key_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_41_strides_0 = const()[name = tensor<string, []>("current_key_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_41_pad_0 = const()[name = tensor<string, []>("current_key_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_41_dilations_0 = const()[name = tensor<string, []>("current_key_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_41_groups_0 = const()[name = tensor<string, []>("current_key_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(781060672)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_41_cast_fp16 = conv(dilations = current_key_41_dilations_0, groups = current_key_41_groups_0, pad = current_key_41_pad_0, pad_type = current_key_41_pad_type_0, strides = current_key_41_strides_0, weight = layers_20_self_attn_k_proj_weight_to_fp16, x = obj_281_cast_fp16)[name = tensor<string, []>("current_key_41_cast_fp16")];
+            tensor<string, []> current_value_41_pad_type_0 = const()[name = tensor<string, []>("current_value_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_41_strides_0 = const()[name = tensor<string, []>("current_value_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_41_pad_0 = const()[name = tensor<string, []>("current_value_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_41_dilations_0 = const()[name = tensor<string, []>("current_value_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_41_groups_0 = const()[name = tensor<string, []>("current_value_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(783157888)))];
+            tensor<fp16, [1024]> layers_20_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(785255104)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_41_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_bias_to_fp16, dilations = current_value_41_dilations_0, groups = current_value_41_groups_0, pad = current_value_41_pad_0, pad_type = current_value_41_pad_type_0, strides = current_value_41_strides_0, weight = layers_20_self_attn_v_proj_weight_to_fp16, x = obj_281_cast_fp16)[name = tensor<string, []>("current_value_41_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4600_cast_fp16 = mul(x = var_87_cast_fp16_20, y = var_207_cast_fp16)[name = tensor<string, []>("op_4600_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4601_cast_fp16 = mul(x = current_key_41_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4601_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_81_cast_fp16 = add(x = var_4600_cast_fp16, y = var_4601_cast_fp16)[name = tensor<string, []>("key_81_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4604_cast_fp16 = mul(x = var_114_cast_fp16_20, y = var_207_cast_fp16)[name = tensor<string, []>("op_4604_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4605_cast_fp16 = mul(x = current_value_41_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4605_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_81_cast_fp16 = add(x = var_4604_cast_fp16, y = var_4605_cast_fp16)[name = tensor<string, []>("value_81_cast_fp16")];
+            tensor<int32, [4]> var_4609 = const()[name = tensor<string, []>("op_4609"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_81_cast_fp16 = reshape(shape = var_4609, x = query_81_cast_fp16)[name = tensor<string, []>("mh_q_81_cast_fp16")];
+            tensor<fp16, []> var_4611_to_fp16 = const()[name = tensor<string, []>("op_4611_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4612_cast_fp16 = mul(x = mh_q_81_cast_fp16, y = var_4611_to_fp16)[name = tensor<string, []>("op_4612_cast_fp16")];
+            tensor<int32, [4]> var_4615 = const()[name = tensor<string, []>("op_4615"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4616_cast_fp16 = reshape(shape = var_4615, x = key_81_cast_fp16)[name = tensor<string, []>("op_4616_cast_fp16")];
+            tensor<bool, []> mh_w_121_transpose_x_0 = const()[name = tensor<string, []>("mh_w_121_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_121_transpose_y_0 = const()[name = tensor<string, []>("mh_w_121_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_121_cast_fp16 = matmul(transpose_x = mh_w_121_transpose_x_0, transpose_y = mh_w_121_transpose_y_0, x = var_4612_cast_fp16, y = var_4616_cast_fp16)[name = tensor<string, []>("mh_w_121_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_123_cast_fp16 = add(x = mh_w_121_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_123_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_4624_cast_fp16 = softmax(axis = var_4536, x = mh_w_123_cast_fp16)[name = tensor<string, []>("op_4624_cast_fp16")];
+            tensor<int32, [4]> var_4625 = const()[name = tensor<string, []>("op_4625"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4626_cast_fp16 = reshape(shape = var_4625, x = value_81_cast_fp16)[name = tensor<string, []>("op_4626_cast_fp16")];
+            tensor<bool, []> attn_81_transpose_x_0 = const()[name = tensor<string, []>("attn_81_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_81_transpose_y_0 = const()[name = tensor<string, []>("attn_81_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_81_cast_fp16 = matmul(transpose_x = attn_81_transpose_x_0, transpose_y = attn_81_transpose_y_0, x = var_4626_cast_fp16, y = var_4624_cast_fp16)[name = tensor<string, []>("attn_81_cast_fp16")];
+            tensor<int32, [4]> var_4629 = const()[name = tensor<string, []>("op_4629"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_201_cast_fp16 = reshape(shape = var_4629, x = attn_81_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> obj_287_pad_type_0 = const()[name = tensor<string, []>("obj_287_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_287_strides_0 = const()[name = tensor<string, []>("obj_287_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_287_pad_0 = const()[name = tensor<string, []>("obj_287_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_287_dilations_0 = const()[name = tensor<string, []>("obj_287_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_287_groups_0 = const()[name = tensor<string, []>("obj_287_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(785257216)))];
+            tensor<fp16, [1024]> layers_20_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787354432)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_287_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_bias_to_fp16, dilations = obj_287_dilations_0, groups = obj_287_groups_0, pad = obj_287_pad_0, pad_type = obj_287_pad_type_0, strides = obj_287_strides_0, weight = layers_20_self_attn_o_proj_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("obj_287_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = obj_287_cast_fp16)[name = tensor<string, []>("inputs_123_cast_fp16")];
+            tensor<int32, [1]> out_123_axes_0 = const()[name = tensor<string, []>("out_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4651_to_fp16 = const()[name = tensor<string, []>("op_4651_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_123_cast_fp16 = layer_norm(axes = out_123_axes_0, epsilon = var_4651_to_fp16, x = inputs_123_cast_fp16)[name = tensor<string, []>("out_123_cast_fp16")];
+            tensor<fp16, [1024]> obj_289_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_289_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787356544)))];
+            tensor<fp16, [1024]> obj_289_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_289_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787358656)))];
+            tensor<fp16, []> obj_289_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_289_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_289_cast_fp16 = batch_norm(beta = obj_289_beta_0_to_fp16, epsilon = obj_289_epsilon_0_to_fp16, gamma = obj_289_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_123_cast_fp16)[name = tensor<string, []>("obj_289_cast_fp16")];
+            tensor<string, []> query_83_pad_type_0 = const()[name = tensor<string, []>("query_83_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_83_strides_0 = const()[name = tensor<string, []>("query_83_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_83_pad_0 = const()[name = tensor<string, []>("query_83_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_83_dilations_0 = const()[name = tensor<string, []>("query_83_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_83_groups_0 = const()[name = tensor<string, []>("query_83_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787360768)))];
+            tensor<fp16, [1024]> layers_20_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(789457984)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_83_cast_fp16 = conv(bias = layers_20_encoder_attn_q_proj_bias_to_fp16, dilations = query_83_dilations_0, groups = query_83_groups_0, pad = query_83_pad_0, pad_type = query_83_pad_type_0, strides = query_83_strides_0, weight = layers_20_encoder_attn_q_proj_weight_to_fp16, x = obj_289_cast_fp16)[name = tensor<string, []>("query_83_cast_fp16")];
+            tensor<string, []> key_83_pad_type_0 = const()[name = tensor<string, []>("key_83_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_83_strides_0 = const()[name = tensor<string, []>("key_83_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_83_pad_0 = const()[name = tensor<string, []>("key_83_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_83_dilations_0 = const()[name = tensor<string, []>("key_83_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_83_groups_0 = const()[name = tensor<string, []>("key_83_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(789460096)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_83_cast_fp16 = conv(dilations = key_83_dilations_0, groups = key_83_groups_0, pad = key_83_pad_0, pad_type = key_83_pad_type_0, strides = key_83_strides_0, weight = layers_20_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_83_cast_fp16")];
+            tensor<string, []> value_83_pad_type_0 = const()[name = tensor<string, []>("value_83_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_83_strides_0 = const()[name = tensor<string, []>("value_83_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_83_pad_0 = const()[name = tensor<string, []>("value_83_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_83_dilations_0 = const()[name = tensor<string, []>("value_83_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_83_groups_0 = const()[name = tensor<string, []>("value_83_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(791557312)))];
+            tensor<fp16, [1024]> layers_20_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(793654528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_83_cast_fp16 = conv(bias = layers_20_encoder_attn_v_proj_bias_to_fp16, dilations = value_83_dilations_0, groups = value_83_groups_0, pad = value_83_pad_0, pad_type = value_83_pad_type_0, strides = value_83_strides_0, weight = layers_20_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_83_cast_fp16")];
+            tensor<int32, [4]> var_4687 = const()[name = tensor<string, []>("op_4687"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_83_cast_fp16 = reshape(shape = var_4687, x = query_83_cast_fp16)[name = tensor<string, []>("mh_q_83_cast_fp16")];
+            tensor<fp16, []> var_4689_to_fp16 = const()[name = tensor<string, []>("op_4689_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4690_cast_fp16 = mul(x = mh_q_83_cast_fp16, y = var_4689_to_fp16)[name = tensor<string, []>("op_4690_cast_fp16")];
+            tensor<int32, [4]> var_4693 = const()[name = tensor<string, []>("op_4693"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4694_cast_fp16 = reshape(shape = var_4693, x = key_83_cast_fp16)[name = tensor<string, []>("op_4694_cast_fp16")];
+            tensor<bool, []> mh_w_125_transpose_x_0 = const()[name = tensor<string, []>("mh_w_125_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_125_transpose_y_0 = const()[name = tensor<string, []>("mh_w_125_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_125_cast_fp16 = matmul(transpose_x = mh_w_125_transpose_x_0, transpose_y = mh_w_125_transpose_y_0, x = var_4690_cast_fp16, y = var_4694_cast_fp16)[name = tensor<string, []>("mh_w_125_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_293_cast_fp16 = softmax(axis = var_4536, x = mh_w_125_cast_fp16)[name = tensor<string, []>("obj_293_cast_fp16")];
+            tensor<int32, [4]> var_4698 = const()[name = tensor<string, []>("op_4698"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4699_cast_fp16 = reshape(shape = var_4698, x = value_83_cast_fp16)[name = tensor<string, []>("op_4699_cast_fp16")];
+            tensor<bool, []> attn_83_transpose_x_0 = const()[name = tensor<string, []>("attn_83_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_83_transpose_y_0 = const()[name = tensor<string, []>("attn_83_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_83_cast_fp16 = matmul(transpose_x = attn_83_transpose_x_0, transpose_y = attn_83_transpose_y_0, x = var_4699_cast_fp16, y = obj_293_cast_fp16)[name = tensor<string, []>("attn_83_cast_fp16")];
+            tensor<int32, [4]> var_4702 = const()[name = tensor<string, []>("op_4702"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_203_cast_fp16 = reshape(shape = var_4702, x = attn_83_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> obj_291_pad_type_0 = const()[name = tensor<string, []>("obj_291_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_291_strides_0 = const()[name = tensor<string, []>("obj_291_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_291_pad_0 = const()[name = tensor<string, []>("obj_291_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_291_dilations_0 = const()[name = tensor<string, []>("obj_291_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_291_groups_0 = const()[name = tensor<string, []>("obj_291_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_20_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(793656640)))];
+            tensor<fp16, [1024]> layers_20_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(795753856)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_291_cast_fp16 = conv(bias = layers_20_encoder_attn_o_proj_bias_to_fp16, dilations = obj_291_dilations_0, groups = obj_291_groups_0, pad = obj_291_pad_0, pad_type = obj_291_pad_type_0, strides = obj_291_strides_0, weight = layers_20_encoder_attn_o_proj_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("obj_291_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = obj_291_cast_fp16)[name = tensor<string, []>("inputs_125_cast_fp16")];
+            tensor<int32, [1]> out_125_axes_0 = const()[name = tensor<string, []>("out_125_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4723_to_fp16 = const()[name = tensor<string, []>("op_4723_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_125_cast_fp16 = layer_norm(axes = out_125_axes_0, epsilon = var_4723_to_fp16, x = inputs_125_cast_fp16)[name = tensor<string, []>("out_125_cast_fp16")];
+            tensor<fp16, [1024]> input_205_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_205_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(795755968)))];
+            tensor<fp16, [1024]> input_205_beta_0_to_fp16 = const()[name = tensor<string, []>("input_205_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(795758080)))];
+            tensor<fp16, []> input_205_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_205_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_205_cast_fp16 = batch_norm(beta = input_205_beta_0_to_fp16, epsilon = input_205_epsilon_0_to_fp16, gamma = input_205_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_125_cast_fp16)[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> input_207_pad_type_0 = const()[name = tensor<string, []>("input_207_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_207_strides_0 = const()[name = tensor<string, []>("input_207_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_207_pad_0 = const()[name = tensor<string, []>("input_207_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_207_dilations_0 = const()[name = tensor<string, []>("input_207_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_207_groups_0 = const()[name = tensor<string, []>("input_207_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_20_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(795760192)))];
+            tensor<fp16, [4096]> layers_20_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804148864)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_207_cast_fp16 = conv(bias = layers_20_fc1_bias_to_fp16, dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = layers_20_fc1_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_mode_0 = const()[name = tensor<string, []>("input_209_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_209_cast_fp16 = gelu(mode = input_209_mode_0, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> hidden_states_43_pad_type_0 = const()[name = tensor<string, []>("hidden_states_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_43_strides_0 = const()[name = tensor<string, []>("hidden_states_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_43_pad_0 = const()[name = tensor<string, []>("hidden_states_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_43_dilations_0 = const()[name = tensor<string, []>("hidden_states_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_43_groups_0 = const()[name = tensor<string, []>("hidden_states_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_20_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_20_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804157120)))];
+            tensor<fp16, [1024]> layers_20_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_20_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(812545792)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_43_cast_fp16 = conv(bias = layers_20_fc2_bias_to_fp16, dilations = hidden_states_43_dilations_0, groups = hidden_states_43_groups_0, pad = hidden_states_43_pad_0, pad_type = hidden_states_43_pad_type_0, strides = hidden_states_43_strides_0, weight = layers_20_fc2_weight_to_fp16, x = input_209_cast_fp16)[name = tensor<string, []>("hidden_states_43_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor<string, []>("inputs_127_cast_fp16")];
+            tensor<int32, []> var_4759 = const()[name = tensor<string, []>("op_4759"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_127_axes_0 = const()[name = tensor<string, []>("out_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4784_to_fp16 = const()[name = tensor<string, []>("op_4784_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_4784_to_fp16, x = inputs_127_cast_fp16)[name = tensor<string, []>("out_127_cast_fp16")];
+            tensor<fp16, [1024]> obj_295_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_295_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(812547904)))];
+            tensor<fp16, [1024]> obj_295_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_295_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(812550016)))];
+            tensor<fp16, []> obj_295_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_295_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_295_cast_fp16 = batch_norm(beta = obj_295_beta_0_to_fp16, epsilon = obj_295_epsilon_0_to_fp16, gamma = obj_295_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_127_cast_fp16)[name = tensor<string, []>("obj_295_cast_fp16")];
+            tensor<string, []> query_85_pad_type_0 = const()[name = tensor<string, []>("query_85_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_85_strides_0 = const()[name = tensor<string, []>("query_85_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_85_pad_0 = const()[name = tensor<string, []>("query_85_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_85_dilations_0 = const()[name = tensor<string, []>("query_85_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_85_groups_0 = const()[name = tensor<string, []>("query_85_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(812552128)))];
+            tensor<fp16, [1024]> layers_21_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814649344)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_85_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_bias_to_fp16, dilations = query_85_dilations_0, groups = query_85_groups_0, pad = query_85_pad_0, pad_type = query_85_pad_type_0, strides = query_85_strides_0, weight = layers_21_self_attn_q_proj_weight_to_fp16, x = obj_295_cast_fp16)[name = tensor<string, []>("query_85_cast_fp16")];
+            tensor<string, []> current_key_43_pad_type_0 = const()[name = tensor<string, []>("current_key_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_43_strides_0 = const()[name = tensor<string, []>("current_key_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_43_pad_0 = const()[name = tensor<string, []>("current_key_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_43_dilations_0 = const()[name = tensor<string, []>("current_key_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_43_groups_0 = const()[name = tensor<string, []>("current_key_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814651456)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_43_cast_fp16 = conv(dilations = current_key_43_dilations_0, groups = current_key_43_groups_0, pad = current_key_43_pad_0, pad_type = current_key_43_pad_type_0, strides = current_key_43_strides_0, weight = layers_21_self_attn_k_proj_weight_to_fp16, x = obj_295_cast_fp16)[name = tensor<string, []>("current_key_43_cast_fp16")];
+            tensor<string, []> current_value_43_pad_type_0 = const()[name = tensor<string, []>("current_value_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_43_strides_0 = const()[name = tensor<string, []>("current_value_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_43_pad_0 = const()[name = tensor<string, []>("current_value_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_43_dilations_0 = const()[name = tensor<string, []>("current_value_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_43_groups_0 = const()[name = tensor<string, []>("current_value_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(816748672)))];
+            tensor<fp16, [1024]> layers_21_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(818845888)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_43_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_bias_to_fp16, dilations = current_value_43_dilations_0, groups = current_value_43_groups_0, pad = current_value_43_pad_0, pad_type = current_value_43_pad_type_0, strides = current_value_43_strides_0, weight = layers_21_self_attn_v_proj_weight_to_fp16, x = obj_295_cast_fp16)[name = tensor<string, []>("current_value_43_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4823_cast_fp16 = mul(x = var_87_cast_fp16_21, y = var_207_cast_fp16)[name = tensor<string, []>("op_4823_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4824_cast_fp16 = mul(x = current_key_43_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4824_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_85_cast_fp16 = add(x = var_4823_cast_fp16, y = var_4824_cast_fp16)[name = tensor<string, []>("key_85_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4827_cast_fp16 = mul(x = var_114_cast_fp16_21, y = var_207_cast_fp16)[name = tensor<string, []>("op_4827_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_4828_cast_fp16 = mul(x = current_value_43_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_4828_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_85_cast_fp16 = add(x = var_4827_cast_fp16, y = var_4828_cast_fp16)[name = tensor<string, []>("value_85_cast_fp16")];
+            tensor<int32, [4]> var_4832 = const()[name = tensor<string, []>("op_4832"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_85_cast_fp16 = reshape(shape = var_4832, x = query_85_cast_fp16)[name = tensor<string, []>("mh_q_85_cast_fp16")];
+            tensor<fp16, []> var_4834_to_fp16 = const()[name = tensor<string, []>("op_4834_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4835_cast_fp16 = mul(x = mh_q_85_cast_fp16, y = var_4834_to_fp16)[name = tensor<string, []>("op_4835_cast_fp16")];
+            tensor<int32, [4]> var_4838 = const()[name = tensor<string, []>("op_4838"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4839_cast_fp16 = reshape(shape = var_4838, x = key_85_cast_fp16)[name = tensor<string, []>("op_4839_cast_fp16")];
+            tensor<bool, []> mh_w_127_transpose_x_0 = const()[name = tensor<string, []>("mh_w_127_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_127_transpose_y_0 = const()[name = tensor<string, []>("mh_w_127_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_127_cast_fp16 = matmul(transpose_x = mh_w_127_transpose_x_0, transpose_y = mh_w_127_transpose_y_0, x = var_4835_cast_fp16, y = var_4839_cast_fp16)[name = tensor<string, []>("mh_w_127_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_129_cast_fp16 = add(x = mh_w_127_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_129_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_4847_cast_fp16 = softmax(axis = var_4759, x = mh_w_129_cast_fp16)[name = tensor<string, []>("op_4847_cast_fp16")];
+            tensor<int32, [4]> var_4848 = const()[name = tensor<string, []>("op_4848"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_4849_cast_fp16 = reshape(shape = var_4848, x = value_85_cast_fp16)[name = tensor<string, []>("op_4849_cast_fp16")];
+            tensor<bool, []> attn_85_transpose_x_0 = const()[name = tensor<string, []>("attn_85_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_85_transpose_y_0 = const()[name = tensor<string, []>("attn_85_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_85_cast_fp16 = matmul(transpose_x = attn_85_transpose_x_0, transpose_y = attn_85_transpose_y_0, x = var_4849_cast_fp16, y = var_4847_cast_fp16)[name = tensor<string, []>("attn_85_cast_fp16")];
+            tensor<int32, [4]> var_4852 = const()[name = tensor<string, []>("op_4852"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_211_cast_fp16 = reshape(shape = var_4852, x = attn_85_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> obj_301_pad_type_0 = const()[name = tensor<string, []>("obj_301_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_301_strides_0 = const()[name = tensor<string, []>("obj_301_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_301_pad_0 = const()[name = tensor<string, []>("obj_301_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_301_dilations_0 = const()[name = tensor<string, []>("obj_301_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_301_groups_0 = const()[name = tensor<string, []>("obj_301_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(818848000)))];
+            tensor<fp16, [1024]> layers_21_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(820945216)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_301_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_bias_to_fp16, dilations = obj_301_dilations_0, groups = obj_301_groups_0, pad = obj_301_pad_0, pad_type = obj_301_pad_type_0, strides = obj_301_strides_0, weight = layers_21_self_attn_o_proj_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("obj_301_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_129_cast_fp16 = add(x = inputs_127_cast_fp16, y = obj_301_cast_fp16)[name = tensor<string, []>("inputs_129_cast_fp16")];
+            tensor<int32, [1]> out_129_axes_0 = const()[name = tensor<string, []>("out_129_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4874_to_fp16 = const()[name = tensor<string, []>("op_4874_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_129_cast_fp16 = layer_norm(axes = out_129_axes_0, epsilon = var_4874_to_fp16, x = inputs_129_cast_fp16)[name = tensor<string, []>("out_129_cast_fp16")];
+            tensor<fp16, [1024]> obj_303_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_303_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(820947328)))];
+            tensor<fp16, [1024]> obj_303_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_303_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(820949440)))];
+            tensor<fp16, []> obj_303_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_303_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_303_cast_fp16 = batch_norm(beta = obj_303_beta_0_to_fp16, epsilon = obj_303_epsilon_0_to_fp16, gamma = obj_303_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_129_cast_fp16)[name = tensor<string, []>("obj_303_cast_fp16")];
+            tensor<string, []> query_87_pad_type_0 = const()[name = tensor<string, []>("query_87_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_87_strides_0 = const()[name = tensor<string, []>("query_87_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_87_pad_0 = const()[name = tensor<string, []>("query_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_87_dilations_0 = const()[name = tensor<string, []>("query_87_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_87_groups_0 = const()[name = tensor<string, []>("query_87_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(820951552)))];
+            tensor<fp16, [1024]> layers_21_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(823048768)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_87_cast_fp16 = conv(bias = layers_21_encoder_attn_q_proj_bias_to_fp16, dilations = query_87_dilations_0, groups = query_87_groups_0, pad = query_87_pad_0, pad_type = query_87_pad_type_0, strides = query_87_strides_0, weight = layers_21_encoder_attn_q_proj_weight_to_fp16, x = obj_303_cast_fp16)[name = tensor<string, []>("query_87_cast_fp16")];
+            tensor<string, []> key_87_pad_type_0 = const()[name = tensor<string, []>("key_87_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_87_strides_0 = const()[name = tensor<string, []>("key_87_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_87_pad_0 = const()[name = tensor<string, []>("key_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_87_dilations_0 = const()[name = tensor<string, []>("key_87_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_87_groups_0 = const()[name = tensor<string, []>("key_87_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(823050880)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_87_cast_fp16 = conv(dilations = key_87_dilations_0, groups = key_87_groups_0, pad = key_87_pad_0, pad_type = key_87_pad_type_0, strides = key_87_strides_0, weight = layers_21_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_87_cast_fp16")];
+            tensor<string, []> value_87_pad_type_0 = const()[name = tensor<string, []>("value_87_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_87_strides_0 = const()[name = tensor<string, []>("value_87_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_87_pad_0 = const()[name = tensor<string, []>("value_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_87_dilations_0 = const()[name = tensor<string, []>("value_87_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_87_groups_0 = const()[name = tensor<string, []>("value_87_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(825148096)))];
+            tensor<fp16, [1024]> layers_21_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827245312)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_87_cast_fp16 = conv(bias = layers_21_encoder_attn_v_proj_bias_to_fp16, dilations = value_87_dilations_0, groups = value_87_groups_0, pad = value_87_pad_0, pad_type = value_87_pad_type_0, strides = value_87_strides_0, weight = layers_21_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_87_cast_fp16")];
+            tensor<int32, [4]> var_4910 = const()[name = tensor<string, []>("op_4910"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_87_cast_fp16 = reshape(shape = var_4910, x = query_87_cast_fp16)[name = tensor<string, []>("mh_q_87_cast_fp16")];
+            tensor<fp16, []> var_4912_to_fp16 = const()[name = tensor<string, []>("op_4912_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_4913_cast_fp16 = mul(x = mh_q_87_cast_fp16, y = var_4912_to_fp16)[name = tensor<string, []>("op_4913_cast_fp16")];
+            tensor<int32, [4]> var_4916 = const()[name = tensor<string, []>("op_4916"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4917_cast_fp16 = reshape(shape = var_4916, x = key_87_cast_fp16)[name = tensor<string, []>("op_4917_cast_fp16")];
+            tensor<bool, []> mh_w_131_transpose_x_0 = const()[name = tensor<string, []>("mh_w_131_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_131_transpose_y_0 = const()[name = tensor<string, []>("mh_w_131_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_131_cast_fp16 = matmul(transpose_x = mh_w_131_transpose_x_0, transpose_y = mh_w_131_transpose_y_0, x = var_4913_cast_fp16, y = var_4917_cast_fp16)[name = tensor<string, []>("mh_w_131_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_307_cast_fp16 = softmax(axis = var_4759, x = mh_w_131_cast_fp16)[name = tensor<string, []>("obj_307_cast_fp16")];
+            tensor<int32, [4]> var_4921 = const()[name = tensor<string, []>("op_4921"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_4922_cast_fp16 = reshape(shape = var_4921, x = value_87_cast_fp16)[name = tensor<string, []>("op_4922_cast_fp16")];
+            tensor<bool, []> attn_87_transpose_x_0 = const()[name = tensor<string, []>("attn_87_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_87_transpose_y_0 = const()[name = tensor<string, []>("attn_87_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_87_cast_fp16 = matmul(transpose_x = attn_87_transpose_x_0, transpose_y = attn_87_transpose_y_0, x = var_4922_cast_fp16, y = obj_307_cast_fp16)[name = tensor<string, []>("attn_87_cast_fp16")];
+            tensor<int32, [4]> var_4925 = const()[name = tensor<string, []>("op_4925"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_213_cast_fp16 = reshape(shape = var_4925, x = attn_87_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> obj_305_pad_type_0 = const()[name = tensor<string, []>("obj_305_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_305_strides_0 = const()[name = tensor<string, []>("obj_305_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_305_pad_0 = const()[name = tensor<string, []>("obj_305_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_305_dilations_0 = const()[name = tensor<string, []>("obj_305_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_305_groups_0 = const()[name = tensor<string, []>("obj_305_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_21_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827247424)))];
+            tensor<fp16, [1024]> layers_21_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(829344640)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_305_cast_fp16 = conv(bias = layers_21_encoder_attn_o_proj_bias_to_fp16, dilations = obj_305_dilations_0, groups = obj_305_groups_0, pad = obj_305_pad_0, pad_type = obj_305_pad_type_0, strides = obj_305_strides_0, weight = layers_21_encoder_attn_o_proj_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("obj_305_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_131_cast_fp16 = add(x = inputs_129_cast_fp16, y = obj_305_cast_fp16)[name = tensor<string, []>("inputs_131_cast_fp16")];
+            tensor<int32, [1]> out_131_axes_0 = const()[name = tensor<string, []>("out_131_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_4943_to_fp16 = const()[name = tensor<string, []>("op_4943_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_131_cast_fp16 = layer_norm(axes = out_131_axes_0, epsilon = var_4943_to_fp16, x = inputs_131_cast_fp16)[name = tensor<string, []>("out_131_cast_fp16")];
+            tensor<fp16, [1024]> input_215_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_215_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(829346752)))];
+            tensor<fp16, [1024]> input_215_beta_0_to_fp16 = const()[name = tensor<string, []>("input_215_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(829348864)))];
+            tensor<fp16, []> input_215_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_215_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_215_cast_fp16 = batch_norm(beta = input_215_beta_0_to_fp16, epsilon = input_215_epsilon_0_to_fp16, gamma = input_215_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_131_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> input_217_pad_type_0 = const()[name = tensor<string, []>("input_217_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_217_strides_0 = const()[name = tensor<string, []>("input_217_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_217_pad_0 = const()[name = tensor<string, []>("input_217_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_217_dilations_0 = const()[name = tensor<string, []>("input_217_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_217_groups_0 = const()[name = tensor<string, []>("input_217_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_21_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(829350976)))];
+            tensor<fp16, [4096]> layers_21_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(837739648)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_217_cast_fp16 = conv(bias = layers_21_fc1_bias_to_fp16, dilations = input_217_dilations_0, groups = input_217_groups_0, pad = input_217_pad_0, pad_type = input_217_pad_type_0, strides = input_217_strides_0, weight = layers_21_fc1_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_mode_0 = const()[name = tensor<string, []>("input_219_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_219_cast_fp16 = gelu(mode = input_219_mode_0, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> hidden_states_45_pad_type_0 = const()[name = tensor<string, []>("hidden_states_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_45_strides_0 = const()[name = tensor<string, []>("hidden_states_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_45_pad_0 = const()[name = tensor<string, []>("hidden_states_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_45_dilations_0 = const()[name = tensor<string, []>("hidden_states_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_45_groups_0 = const()[name = tensor<string, []>("hidden_states_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_21_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_21_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(837747904)))];
+            tensor<fp16, [1024]> layers_21_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_21_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(846136576)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_45_cast_fp16 = conv(bias = layers_21_fc2_bias_to_fp16, dilations = hidden_states_45_dilations_0, groups = hidden_states_45_groups_0, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = hidden_states_45_strides_0, weight = layers_21_fc2_weight_to_fp16, x = input_219_cast_fp16)[name = tensor<string, []>("hidden_states_45_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_133_cast_fp16 = add(x = inputs_131_cast_fp16, y = hidden_states_45_cast_fp16)[name = tensor<string, []>("inputs_133_cast_fp16")];
+            tensor<int32, []> var_4978 = const()[name = tensor<string, []>("op_4978"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_133_axes_0 = const()[name = tensor<string, []>("out_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5003_to_fp16 = const()[name = tensor<string, []>("op_5003_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_133_cast_fp16 = layer_norm(axes = out_133_axes_0, epsilon = var_5003_to_fp16, x = inputs_133_cast_fp16)[name = tensor<string, []>("out_133_cast_fp16")];
+            tensor<fp16, [1024]> obj_309_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_309_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(846138688)))];
+            tensor<fp16, [1024]> obj_309_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_309_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(846140800)))];
+            tensor<fp16, []> obj_309_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_309_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_309_cast_fp16 = batch_norm(beta = obj_309_beta_0_to_fp16, epsilon = obj_309_epsilon_0_to_fp16, gamma = obj_309_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_133_cast_fp16)[name = tensor<string, []>("obj_309_cast_fp16")];
+            tensor<string, []> query_89_pad_type_0 = const()[name = tensor<string, []>("query_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_89_strides_0 = const()[name = tensor<string, []>("query_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_89_pad_0 = const()[name = tensor<string, []>("query_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_89_dilations_0 = const()[name = tensor<string, []>("query_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_89_groups_0 = const()[name = tensor<string, []>("query_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(846142912)))];
+            tensor<fp16, [1024]> layers_22_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(848240128)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_89_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_bias_to_fp16, dilations = query_89_dilations_0, groups = query_89_groups_0, pad = query_89_pad_0, pad_type = query_89_pad_type_0, strides = query_89_strides_0, weight = layers_22_self_attn_q_proj_weight_to_fp16, x = obj_309_cast_fp16)[name = tensor<string, []>("query_89_cast_fp16")];
+            tensor<string, []> current_key_45_pad_type_0 = const()[name = tensor<string, []>("current_key_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_45_strides_0 = const()[name = tensor<string, []>("current_key_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_45_pad_0 = const()[name = tensor<string, []>("current_key_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_45_dilations_0 = const()[name = tensor<string, []>("current_key_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_45_groups_0 = const()[name = tensor<string, []>("current_key_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(848242240)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_45_cast_fp16 = conv(dilations = current_key_45_dilations_0, groups = current_key_45_groups_0, pad = current_key_45_pad_0, pad_type = current_key_45_pad_type_0, strides = current_key_45_strides_0, weight = layers_22_self_attn_k_proj_weight_to_fp16, x = obj_309_cast_fp16)[name = tensor<string, []>("current_key_45_cast_fp16")];
+            tensor<string, []> current_value_45_pad_type_0 = const()[name = tensor<string, []>("current_value_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_45_strides_0 = const()[name = tensor<string, []>("current_value_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_45_pad_0 = const()[name = tensor<string, []>("current_value_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_45_dilations_0 = const()[name = tensor<string, []>("current_value_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_45_groups_0 = const()[name = tensor<string, []>("current_value_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850339456)))];
+            tensor<fp16, [1024]> layers_22_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(852436672)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_45_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_bias_to_fp16, dilations = current_value_45_dilations_0, groups = current_value_45_groups_0, pad = current_value_45_pad_0, pad_type = current_value_45_pad_type_0, strides = current_value_45_strides_0, weight = layers_22_self_attn_v_proj_weight_to_fp16, x = obj_309_cast_fp16)[name = tensor<string, []>("current_value_45_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5042_cast_fp16 = mul(x = var_87_cast_fp16_22, y = var_207_cast_fp16)[name = tensor<string, []>("op_5042_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5043_cast_fp16 = mul(x = current_key_45_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_5043_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_89_cast_fp16 = add(x = var_5042_cast_fp16, y = var_5043_cast_fp16)[name = tensor<string, []>("key_89_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5046_cast_fp16 = mul(x = var_114_cast_fp16_22, y = var_207_cast_fp16)[name = tensor<string, []>("op_5046_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5047_cast_fp16 = mul(x = current_value_45_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_5047_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_89_cast_fp16 = add(x = var_5046_cast_fp16, y = var_5047_cast_fp16)[name = tensor<string, []>("value_89_cast_fp16")];
+            tensor<int32, [4]> var_5051 = const()[name = tensor<string, []>("op_5051"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_89_cast_fp16 = reshape(shape = var_5051, x = query_89_cast_fp16)[name = tensor<string, []>("mh_q_89_cast_fp16")];
+            tensor<fp16, []> var_5053_to_fp16 = const()[name = tensor<string, []>("op_5053_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_5054_cast_fp16 = mul(x = mh_q_89_cast_fp16, y = var_5053_to_fp16)[name = tensor<string, []>("op_5054_cast_fp16")];
+            tensor<int32, [4]> var_5057 = const()[name = tensor<string, []>("op_5057"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_5058_cast_fp16 = reshape(shape = var_5057, x = key_89_cast_fp16)[name = tensor<string, []>("op_5058_cast_fp16")];
+            tensor<bool, []> mh_w_133_transpose_x_0 = const()[name = tensor<string, []>("mh_w_133_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_133_transpose_y_0 = const()[name = tensor<string, []>("mh_w_133_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_133_cast_fp16 = matmul(transpose_x = mh_w_133_transpose_x_0, transpose_y = mh_w_133_transpose_y_0, x = var_5054_cast_fp16, y = var_5058_cast_fp16)[name = tensor<string, []>("mh_w_133_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_135_cast_fp16 = add(x = mh_w_133_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_135_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_5066_cast_fp16 = softmax(axis = var_4978, x = mh_w_135_cast_fp16)[name = tensor<string, []>("op_5066_cast_fp16")];
+            tensor<int32, [4]> var_5067 = const()[name = tensor<string, []>("op_5067"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_5068_cast_fp16 = reshape(shape = var_5067, x = value_89_cast_fp16)[name = tensor<string, []>("op_5068_cast_fp16")];
+            tensor<bool, []> attn_89_transpose_x_0 = const()[name = tensor<string, []>("attn_89_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_89_transpose_y_0 = const()[name = tensor<string, []>("attn_89_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_89_cast_fp16 = matmul(transpose_x = attn_89_transpose_x_0, transpose_y = attn_89_transpose_y_0, x = var_5068_cast_fp16, y = var_5066_cast_fp16)[name = tensor<string, []>("attn_89_cast_fp16")];
+            tensor<int32, [4]> var_5071 = const()[name = tensor<string, []>("op_5071"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_221_cast_fp16 = reshape(shape = var_5071, x = attn_89_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> obj_315_pad_type_0 = const()[name = tensor<string, []>("obj_315_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_315_strides_0 = const()[name = tensor<string, []>("obj_315_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_315_pad_0 = const()[name = tensor<string, []>("obj_315_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_315_dilations_0 = const()[name = tensor<string, []>("obj_315_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_315_groups_0 = const()[name = tensor<string, []>("obj_315_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(852438784)))];
+            tensor<fp16, [1024]> layers_22_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854536000)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_315_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_bias_to_fp16, dilations = obj_315_dilations_0, groups = obj_315_groups_0, pad = obj_315_pad_0, pad_type = obj_315_pad_type_0, strides = obj_315_strides_0, weight = layers_22_self_attn_o_proj_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("obj_315_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_135_cast_fp16 = add(x = inputs_133_cast_fp16, y = obj_315_cast_fp16)[name = tensor<string, []>("inputs_135_cast_fp16")];
+            tensor<int32, [1]> out_135_axes_0 = const()[name = tensor<string, []>("out_135_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5093_to_fp16 = const()[name = tensor<string, []>("op_5093_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_135_cast_fp16 = layer_norm(axes = out_135_axes_0, epsilon = var_5093_to_fp16, x = inputs_135_cast_fp16)[name = tensor<string, []>("out_135_cast_fp16")];
+            tensor<fp16, [1024]> obj_317_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_317_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854538112)))];
+            tensor<fp16, [1024]> obj_317_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_317_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854540224)))];
+            tensor<fp16, []> obj_317_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_317_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_317_cast_fp16 = batch_norm(beta = obj_317_beta_0_to_fp16, epsilon = obj_317_epsilon_0_to_fp16, gamma = obj_317_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_135_cast_fp16)[name = tensor<string, []>("obj_317_cast_fp16")];
+            tensor<string, []> query_91_pad_type_0 = const()[name = tensor<string, []>("query_91_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_91_strides_0 = const()[name = tensor<string, []>("query_91_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_91_pad_0 = const()[name = tensor<string, []>("query_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_91_dilations_0 = const()[name = tensor<string, []>("query_91_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_91_groups_0 = const()[name = tensor<string, []>("query_91_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854542336)))];
+            tensor<fp16, [1024]> layers_22_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(856639552)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_91_cast_fp16 = conv(bias = layers_22_encoder_attn_q_proj_bias_to_fp16, dilations = query_91_dilations_0, groups = query_91_groups_0, pad = query_91_pad_0, pad_type = query_91_pad_type_0, strides = query_91_strides_0, weight = layers_22_encoder_attn_q_proj_weight_to_fp16, x = obj_317_cast_fp16)[name = tensor<string, []>("query_91_cast_fp16")];
+            tensor<string, []> key_91_pad_type_0 = const()[name = tensor<string, []>("key_91_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_91_strides_0 = const()[name = tensor<string, []>("key_91_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_91_pad_0 = const()[name = tensor<string, []>("key_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_91_dilations_0 = const()[name = tensor<string, []>("key_91_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_91_groups_0 = const()[name = tensor<string, []>("key_91_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(856641664)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_91_cast_fp16 = conv(dilations = key_91_dilations_0, groups = key_91_groups_0, pad = key_91_pad_0, pad_type = key_91_pad_type_0, strides = key_91_strides_0, weight = layers_22_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_91_cast_fp16")];
+            tensor<string, []> value_91_pad_type_0 = const()[name = tensor<string, []>("value_91_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_91_strides_0 = const()[name = tensor<string, []>("value_91_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_91_pad_0 = const()[name = tensor<string, []>("value_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_91_dilations_0 = const()[name = tensor<string, []>("value_91_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_91_groups_0 = const()[name = tensor<string, []>("value_91_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(858738880)))];
+            tensor<fp16, [1024]> layers_22_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(860836096)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_91_cast_fp16 = conv(bias = layers_22_encoder_attn_v_proj_bias_to_fp16, dilations = value_91_dilations_0, groups = value_91_groups_0, pad = value_91_pad_0, pad_type = value_91_pad_type_0, strides = value_91_strides_0, weight = layers_22_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_91_cast_fp16")];
+            tensor<int32, [4]> var_5129 = const()[name = tensor<string, []>("op_5129"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_91_cast_fp16 = reshape(shape = var_5129, x = query_91_cast_fp16)[name = tensor<string, []>("mh_q_91_cast_fp16")];
+            tensor<fp16, []> var_5131_to_fp16 = const()[name = tensor<string, []>("op_5131_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_5132_cast_fp16 = mul(x = mh_q_91_cast_fp16, y = var_5131_to_fp16)[name = tensor<string, []>("op_5132_cast_fp16")];
+            tensor<int32, [4]> var_5135 = const()[name = tensor<string, []>("op_5135"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_5136_cast_fp16 = reshape(shape = var_5135, x = key_91_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<bool, []> mh_w_137_transpose_x_0 = const()[name = tensor<string, []>("mh_w_137_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_137_transpose_y_0 = const()[name = tensor<string, []>("mh_w_137_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_137_cast_fp16 = matmul(transpose_x = mh_w_137_transpose_x_0, transpose_y = mh_w_137_transpose_y_0, x = var_5132_cast_fp16, y = var_5136_cast_fp16)[name = tensor<string, []>("mh_w_137_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_321_cast_fp16 = softmax(axis = var_4978, x = mh_w_137_cast_fp16)[name = tensor<string, []>("obj_321_cast_fp16")];
+            tensor<int32, [4]> var_5140 = const()[name = tensor<string, []>("op_5140"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_5141_cast_fp16 = reshape(shape = var_5140, x = value_91_cast_fp16)[name = tensor<string, []>("op_5141_cast_fp16")];
+            tensor<bool, []> attn_91_transpose_x_0 = const()[name = tensor<string, []>("attn_91_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_91_transpose_y_0 = const()[name = tensor<string, []>("attn_91_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_91_cast_fp16 = matmul(transpose_x = attn_91_transpose_x_0, transpose_y = attn_91_transpose_y_0, x = var_5141_cast_fp16, y = obj_321_cast_fp16)[name = tensor<string, []>("attn_91_cast_fp16")];
+            tensor<int32, [4]> var_5144 = const()[name = tensor<string, []>("op_5144"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_223_cast_fp16 = reshape(shape = var_5144, x = attn_91_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> obj_319_pad_type_0 = const()[name = tensor<string, []>("obj_319_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_319_strides_0 = const()[name = tensor<string, []>("obj_319_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_319_pad_0 = const()[name = tensor<string, []>("obj_319_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_319_dilations_0 = const()[name = tensor<string, []>("obj_319_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_319_groups_0 = const()[name = tensor<string, []>("obj_319_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_22_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(860838208)))];
+            tensor<fp16, [1024]> layers_22_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(862935424)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_319_cast_fp16 = conv(bias = layers_22_encoder_attn_o_proj_bias_to_fp16, dilations = obj_319_dilations_0, groups = obj_319_groups_0, pad = obj_319_pad_0, pad_type = obj_319_pad_type_0, strides = obj_319_strides_0, weight = layers_22_encoder_attn_o_proj_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("obj_319_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_137_cast_fp16 = add(x = inputs_135_cast_fp16, y = obj_319_cast_fp16)[name = tensor<string, []>("inputs_137_cast_fp16")];
+            tensor<int32, [1]> out_137_axes_0 = const()[name = tensor<string, []>("out_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5162_to_fp16 = const()[name = tensor<string, []>("op_5162_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_137_cast_fp16 = layer_norm(axes = out_137_axes_0, epsilon = var_5162_to_fp16, x = inputs_137_cast_fp16)[name = tensor<string, []>("out_137_cast_fp16")];
+            tensor<fp16, [1024]> input_225_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_225_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(862937536)))];
+            tensor<fp16, [1024]> input_225_beta_0_to_fp16 = const()[name = tensor<string, []>("input_225_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(862939648)))];
+            tensor<fp16, []> input_225_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_225_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_225_cast_fp16 = batch_norm(beta = input_225_beta_0_to_fp16, epsilon = input_225_epsilon_0_to_fp16, gamma = input_225_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_137_cast_fp16)[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> input_227_pad_type_0 = const()[name = tensor<string, []>("input_227_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_227_strides_0 = const()[name = tensor<string, []>("input_227_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_227_pad_0 = const()[name = tensor<string, []>("input_227_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_227_dilations_0 = const()[name = tensor<string, []>("input_227_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_227_groups_0 = const()[name = tensor<string, []>("input_227_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_22_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(862941760)))];
+            tensor<fp16, [4096]> layers_22_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(871330432)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_227_cast_fp16 = conv(bias = layers_22_fc1_bias_to_fp16, dilations = input_227_dilations_0, groups = input_227_groups_0, pad = input_227_pad_0, pad_type = input_227_pad_type_0, strides = input_227_strides_0, weight = layers_22_fc1_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_mode_0 = const()[name = tensor<string, []>("input_229_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_229_cast_fp16 = gelu(mode = input_229_mode_0, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> hidden_states_47_pad_type_0 = const()[name = tensor<string, []>("hidden_states_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = tensor<string, []>("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = tensor<string, []>("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = tensor<string, []>("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_47_groups_0 = const()[name = tensor<string, []>("hidden_states_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_22_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_22_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(871338688)))];
+            tensor<fp16, [1024]> layers_22_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_22_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(879727360)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_47_cast_fp16 = conv(bias = layers_22_fc2_bias_to_fp16, dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = layers_22_fc2_weight_to_fp16, x = input_229_cast_fp16)[name = tensor<string, []>("hidden_states_47_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_139_cast_fp16 = add(x = inputs_137_cast_fp16, y = hidden_states_47_cast_fp16)[name = tensor<string, []>("inputs_139_cast_fp16")];
+            tensor<int32, []> var_5197 = const()[name = tensor<string, []>("op_5197"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_139_axes_0 = const()[name = tensor<string, []>("out_139_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5222_to_fp16 = const()[name = tensor<string, []>("op_5222_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_139_cast_fp16 = layer_norm(axes = out_139_axes_0, epsilon = var_5222_to_fp16, x = inputs_139_cast_fp16)[name = tensor<string, []>("out_139_cast_fp16")];
+            tensor<fp16, [1024]> obj_323_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_323_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(879729472)))];
+            tensor<fp16, [1024]> obj_323_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_323_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(879731584)))];
+            tensor<fp16, []> obj_323_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_323_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_323_cast_fp16 = batch_norm(beta = obj_323_beta_0_to_fp16, epsilon = obj_323_epsilon_0_to_fp16, gamma = obj_323_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_139_cast_fp16)[name = tensor<string, []>("obj_323_cast_fp16")];
+            tensor<string, []> query_93_pad_type_0 = const()[name = tensor<string, []>("query_93_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_93_strides_0 = const()[name = tensor<string, []>("query_93_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_93_pad_0 = const()[name = tensor<string, []>("query_93_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_93_dilations_0 = const()[name = tensor<string, []>("query_93_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_93_groups_0 = const()[name = tensor<string, []>("query_93_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(879733696)))];
+            tensor<fp16, [1024]> layers_23_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(881830912)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_93_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_bias_to_fp16, dilations = query_93_dilations_0, groups = query_93_groups_0, pad = query_93_pad_0, pad_type = query_93_pad_type_0, strides = query_93_strides_0, weight = layers_23_self_attn_q_proj_weight_to_fp16, x = obj_323_cast_fp16)[name = tensor<string, []>("query_93_cast_fp16")];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_strides_0 = const()[name = tensor<string, []>("current_key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_dilations_0 = const()[name = tensor<string, []>("current_key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_groups_0 = const()[name = tensor<string, []>("current_key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(881833024)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_key_cast_fp16 = conv(dilations = current_key_dilations_0, groups = current_key_groups_0, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = current_key_strides_0, weight = layers_23_self_attn_k_proj_weight_to_fp16, x = obj_323_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_strides_0 = const()[name = tensor<string, []>("current_value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_dilations_0 = const()[name = tensor<string, []>("current_value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_groups_0 = const()[name = tensor<string, []>("current_value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883930240)))];
+            tensor<fp16, [1024]> layers_23_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(886027456)))];
+            tensor<fp16, [1, 1024, 1, 1]> current_value_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_23_self_attn_v_proj_weight_to_fp16, x = obj_323_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5261_cast_fp16 = mul(x = var_87_cast_fp16_23, y = var_207_cast_fp16)[name = tensor<string, []>("op_5261_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5262_cast_fp16 = mul(x = current_key_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_5262_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> key_93_cast_fp16 = add(x = var_5261_cast_fp16, y = var_5262_cast_fp16)[name = tensor<string, []>("key_93_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5265_cast_fp16 = mul(x = var_114_cast_fp16_23, y = var_207_cast_fp16)[name = tensor<string, []>("op_5265_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> var_5266_cast_fp16 = mul(x = current_value_cast_fp16, y = var_205_cast_fp16)[name = tensor<string, []>("op_5266_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 448]> value_93_cast_fp16 = add(x = var_5265_cast_fp16, y = var_5266_cast_fp16)[name = tensor<string, []>("value_93_cast_fp16")];
+            tensor<int32, [4]> var_5270 = const()[name = tensor<string, []>("op_5270"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_93_cast_fp16 = reshape(shape = var_5270, x = query_93_cast_fp16)[name = tensor<string, []>("mh_q_93_cast_fp16")];
+            tensor<fp16, []> var_5272_to_fp16 = const()[name = tensor<string, []>("op_5272_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_5273_cast_fp16 = mul(x = mh_q_93_cast_fp16, y = var_5272_to_fp16)[name = tensor<string, []>("op_5273_cast_fp16")];
+            tensor<int32, [4]> var_5276 = const()[name = tensor<string, []>("op_5276"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_5277_cast_fp16 = reshape(shape = var_5276, x = key_93_cast_fp16)[name = tensor<string, []>("op_5277_cast_fp16")];
+            tensor<bool, []> mh_w_139_transpose_x_0 = const()[name = tensor<string, []>("mh_w_139_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_139_transpose_y_0 = const()[name = tensor<string, []>("mh_w_139_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_139_cast_fp16 = matmul(transpose_x = mh_w_139_transpose_x_0, transpose_y = mh_w_139_transpose_y_0, x = var_5273_cast_fp16, y = var_5277_cast_fp16)[name = tensor<string, []>("mh_w_139_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> mh_w_141_cast_fp16 = add(x = mh_w_139_cast_fp16, y = var_229_cast_fp16)[name = tensor<string, []>("mh_w_141_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 448]> var_5285_cast_fp16 = softmax(axis = var_5197, x = mh_w_141_cast_fp16)[name = tensor<string, []>("op_5285_cast_fp16")];
+            tensor<int32, [4]> var_5286 = const()[name = tensor<string, []>("op_5286"), val = tensor<int32, [4]>([1, 16, 64, 448])];
+            tensor<fp16, [1, 16, 64, 448]> var_5287_cast_fp16 = reshape(shape = var_5286, x = value_93_cast_fp16)[name = tensor<string, []>("op_5287_cast_fp16")];
+            tensor<bool, []> attn_93_transpose_x_0 = const()[name = tensor<string, []>("attn_93_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_93_transpose_y_0 = const()[name = tensor<string, []>("attn_93_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_93_cast_fp16 = matmul(transpose_x = attn_93_transpose_x_0, transpose_y = attn_93_transpose_y_0, x = var_5287_cast_fp16, y = var_5285_cast_fp16)[name = tensor<string, []>("attn_93_cast_fp16")];
+            tensor<int32, [4]> var_5290 = const()[name = tensor<string, []>("op_5290"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_231_cast_fp16 = reshape(shape = var_5290, x = attn_93_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> obj_329_pad_type_0 = const()[name = tensor<string, []>("obj_329_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_329_strides_0 = const()[name = tensor<string, []>("obj_329_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_329_pad_0 = const()[name = tensor<string, []>("obj_329_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_329_dilations_0 = const()[name = tensor<string, []>("obj_329_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_329_groups_0 = const()[name = tensor<string, []>("obj_329_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(886029568)))];
+            tensor<fp16, [1024]> layers_23_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(888126784)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_329_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_bias_to_fp16, dilations = obj_329_dilations_0, groups = obj_329_groups_0, pad = obj_329_pad_0, pad_type = obj_329_pad_type_0, strides = obj_329_strides_0, weight = layers_23_self_attn_o_proj_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("obj_329_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_141_cast_fp16 = add(x = inputs_139_cast_fp16, y = obj_329_cast_fp16)[name = tensor<string, []>("inputs_141_cast_fp16")];
+            tensor<int32, [1]> out_141_axes_0 = const()[name = tensor<string, []>("out_141_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5312_to_fp16 = const()[name = tensor<string, []>("op_5312_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_141_cast_fp16 = layer_norm(axes = out_141_axes_0, epsilon = var_5312_to_fp16, x = inputs_141_cast_fp16)[name = tensor<string, []>("out_141_cast_fp16")];
+            tensor<fp16, [1024]> obj_331_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_331_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(888128896)))];
+            tensor<fp16, [1024]> obj_331_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_331_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(888131008)))];
+            tensor<fp16, []> obj_331_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_331_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> obj_331_cast_fp16 = batch_norm(beta = obj_331_beta_0_to_fp16, epsilon = obj_331_epsilon_0_to_fp16, gamma = obj_331_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_141_cast_fp16)[name = tensor<string, []>("obj_331_cast_fp16")];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_strides_0 = const()[name = tensor<string, []>("query_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_dilations_0 = const()[name = tensor<string, []>("query_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_groups_0 = const()[name = tensor<string, []>("query_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(888133120)))];
+            tensor<fp16, [1024]> layers_23_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(890230336)))];
+            tensor<fp16, [1, 1024, 1, 1]> query_cast_fp16 = conv(bias = layers_23_encoder_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_23_encoder_attn_q_proj_weight_to_fp16, x = obj_331_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_strides_0 = const()[name = tensor<string, []>("key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_dilations_0 = const()[name = tensor<string, []>("key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_groups_0 = const()[name = tensor<string, []>("key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(890232448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_23_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_strides_0 = const()[name = tensor<string, []>("value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_dilations_0 = const()[name = tensor<string, []>("value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_groups_0 = const()[name = tensor<string, []>("value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(892329664)))];
+            tensor<fp16, [1024]> layers_23_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(894426880)))];
+            tensor<fp16, [1, 1024, 1, 1500]> value_cast_fp16 = conv(bias = layers_23_encoder_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_23_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_5348 = const()[name = tensor<string, []>("op_5348"), val = tensor<int32, [4]>([1, 16, 64, 1])];
+            tensor<fp16, [1, 16, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_5348, x = query_cast_fp16)[name = tensor<string, []>("mh_q_cast_fp16")];
+            tensor<fp16, []> var_5350_to_fp16 = const()[name = tensor<string, []>("op_5350_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 16, 64, 1]> var_5351_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_5350_to_fp16)[name = tensor<string, []>("op_5351_cast_fp16")];
+            tensor<int32, [4]> var_5354 = const()[name = tensor<string, []>("op_5354"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_5355_cast_fp16 = reshape(shape = var_5354, x = key_cast_fp16)[name = tensor<string, []>("op_5355_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 16, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_5351_cast_fp16, y = var_5355_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 16, 1, 1500]> obj_335_cast_fp16 = softmax(axis = var_5197, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_335_cast_fp16")];
+            tensor<int32, [4]> var_5359 = const()[name = tensor<string, []>("op_5359"), val = tensor<int32, [4]>([1, 16, 64, 1500])];
+            tensor<fp16, [1, 16, 64, 1500]> var_5360_cast_fp16 = reshape(shape = var_5359, x = value_cast_fp16)[name = tensor<string, []>("op_5360_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 16, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_5360_cast_fp16, y = obj_335_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_5363 = const()[name = tensor<string, []>("op_5363"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
+            tensor<fp16, [1, 1024, 1, 1]> input_233_cast_fp16 = reshape(shape = var_5363, x = attn_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> obj_333_pad_type_0 = const()[name = tensor<string, []>("obj_333_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_333_strides_0 = const()[name = tensor<string, []>("obj_333_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_333_pad_0 = const()[name = tensor<string, []>("obj_333_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_333_dilations_0 = const()[name = tensor<string, []>("obj_333_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_333_groups_0 = const()[name = tensor<string, []>("obj_333_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> layers_23_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(894428992)))];
+            tensor<fp16, [1024]> layers_23_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(896526208)))];
+            tensor<fp16, [1, 1024, 1, 1]> obj_333_cast_fp16 = conv(bias = layers_23_encoder_attn_o_proj_bias_to_fp16, dilations = obj_333_dilations_0, groups = obj_333_groups_0, pad = obj_333_pad_0, pad_type = obj_333_pad_type_0, strides = obj_333_strides_0, weight = layers_23_encoder_attn_o_proj_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("obj_333_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_143_cast_fp16 = add(x = inputs_141_cast_fp16, y = obj_333_cast_fp16)[name = tensor<string, []>("inputs_143_cast_fp16")];
+            tensor<int32, [1]> out_143_axes_0 = const()[name = tensor<string, []>("out_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5384_to_fp16 = const()[name = tensor<string, []>("op_5384_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_143_cast_fp16 = layer_norm(axes = out_143_axes_0, epsilon = var_5384_to_fp16, x = inputs_143_cast_fp16)[name = tensor<string, []>("out_143_cast_fp16")];
+            tensor<fp16, [1024]> input_235_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_235_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(896528320)))];
+            tensor<fp16, [1024]> input_235_beta_0_to_fp16 = const()[name = tensor<string, []>("input_235_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(896530432)))];
+            tensor<fp16, []> input_235_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_235_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> input_235_cast_fp16 = batch_norm(beta = input_235_beta_0_to_fp16, epsilon = input_235_epsilon_0_to_fp16, gamma = input_235_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_143_cast_fp16)[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> input_237_pad_type_0 = const()[name = tensor<string, []>("input_237_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_237_strides_0 = const()[name = tensor<string, []>("input_237_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_237_pad_0 = const()[name = tensor<string, []>("input_237_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_237_dilations_0 = const()[name = tensor<string, []>("input_237_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_237_groups_0 = const()[name = tensor<string, []>("input_237_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> layers_23_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_fc1_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(896532544)))];
+            tensor<fp16, [4096]> layers_23_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_fc1_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(904921216)))];
+            tensor<fp16, [1, 4096, 1, 1]> input_237_cast_fp16 = conv(bias = layers_23_fc1_bias_to_fp16, dilations = input_237_dilations_0, groups = input_237_groups_0, pad = input_237_pad_0, pad_type = input_237_pad_type_0, strides = input_237_strides_0, weight = layers_23_fc1_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_237_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> hidden_states_49_pad_type_0 = const()[name = tensor<string, []>("hidden_states_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_49_strides_0 = const()[name = tensor<string, []>("hidden_states_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_49_pad_0 = const()[name = tensor<string, []>("hidden_states_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_49_dilations_0 = const()[name = tensor<string, []>("hidden_states_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_49_groups_0 = const()[name = tensor<string, []>("hidden_states_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> layers_23_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_23_fc2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(904929472)))];
+            tensor<fp16, [1024]> layers_23_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_23_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(913318144)))];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_49_cast_fp16 = conv(bias = layers_23_fc2_bias_to_fp16, dilations = hidden_states_49_dilations_0, groups = hidden_states_49_groups_0, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = hidden_states_49_strides_0, weight = layers_23_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_49_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1]> inputs_cast_fp16 = add(x = inputs_143_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = tensor<string, []>("out_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_5427_to_fp16 = const()[name = tensor<string, []>("op_5427_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_5427_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [1024]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(913320256)))];
+            tensor<fp16, [1024]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(913322368)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_5438_axes_0 = const()[name = tensor<string, []>("op_5438_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1]> var_5438_cast_fp16 = squeeze(axes = var_5438_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<int32, [3]> var_5441_perm_0 = const()[name = tensor<string, []>("op_5441_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(913324480)))];
+            tensor<fp16, [1, 1, 1024]> var_5441_cast_fp16 = transpose(perm = var_5441_perm_0, x = var_5438_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_5441_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_5445 = const()[name = tensor<string, []>("op_5445"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_339_interleave_0 = const()[name = tensor<string, []>("obj_339_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 24576, 1, 1]> key_cache_updates = concat(axis = var_5445, interleave = obj_339_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_23_cast_fp16, current_key_25_cast_fp16, current_key_27_cast_fp16, current_key_29_cast_fp16, current_key_31_cast_fp16, current_key_33_cast_fp16, current_key_35_cast_fp16, current_key_37_cast_fp16, current_key_39_cast_fp16, current_key_41_cast_fp16, current_key_43_cast_fp16, current_key_45_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_339_cast_fp16")];
+            tensor<int32, []> var_5448 = const()[name = tensor<string, []>("op_5448"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_341_interleave_0 = const()[name = tensor<string, []>("obj_341_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 24576, 1, 1]> value_cache_updates = concat(axis = var_5448, interleave = obj_341_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_23_cast_fp16, current_value_25_cast_fp16, current_value_27_cast_fp16, current_value_29_cast_fp16, current_value_31_cast_fp16, current_value_33_cast_fp16, current_value_35_cast_fp16, current_value_37_cast_fp16, current_value_39_cast_fp16, current_value_41_cast_fp16, current_value_43_cast_fp16, current_value_45_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_341_cast_fp16")];
+            tensor<int32, [4]> var_5459_begin_0 = const()[name = tensor<string, []>("op_5459_begin_0"), val = tensor<int32, [4]>([0, 15, 0, 0])];
+            tensor<int32, [4]> var_5459_end_0 = const()[name = tensor<string, []>("op_5459_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5459_end_mask_0 = const()[name = tensor<string, []>("op_5459_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5459_cast_fp16 = slice_by_index(begin = var_5459_begin_0, end = var_5459_end_0, end_mask = var_5459_end_mask_0, x = obj_195_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<int32, [4]> var_5462_begin_0 = const()[name = tensor<string, []>("op_5462_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5462_end_0 = const()[name = tensor<string, []>("op_5462_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5462_end_mask_0 = const()[name = tensor<string, []>("op_5462_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5462_squeeze_mask_0 = const()[name = tensor<string, []>("op_5462_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5462_cast_fp16 = slice_by_index(begin = var_5462_begin_0, end = var_5462_end_0, end_mask = var_5462_end_mask_0, squeeze_mask = var_5462_squeeze_mask_0, x = var_5459_cast_fp16)[name = tensor<string, []>("op_5462_cast_fp16")];
+            tensor<int32, [4]> var_5477_begin_0 = const()[name = tensor<string, []>("op_5477_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_5477_end_0 = const()[name = tensor<string, []>("op_5477_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_5477_end_mask_0 = const()[name = tensor<string, []>("op_5477_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5477_cast_fp16 = slice_by_index(begin = var_5477_begin_0, end = var_5477_end_0, end_mask = var_5477_end_mask_0, x = obj_223_cast_fp16)[name = tensor<string, []>("op_5477_cast_fp16")];
+            tensor<int32, [4]> var_5480_begin_0 = const()[name = tensor<string, []>("op_5480_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5480_end_0 = const()[name = tensor<string, []>("op_5480_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5480_end_mask_0 = const()[name = tensor<string, []>("op_5480_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5480_squeeze_mask_0 = const()[name = tensor<string, []>("op_5480_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5480_cast_fp16 = slice_by_index(begin = var_5480_begin_0, end = var_5480_end_0, end_mask = var_5480_end_mask_0, squeeze_mask = var_5480_squeeze_mask_0, x = var_5477_cast_fp16)[name = tensor<string, []>("op_5480_cast_fp16")];
+            tensor<int32, [4]> var_5495_begin_0 = const()[name = tensor<string, []>("op_5495_begin_0"), val = tensor<int32, [4]>([0, 15, 0, 0])];
+            tensor<int32, [4]> var_5495_end_0 = const()[name = tensor<string, []>("op_5495_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5495_end_mask_0 = const()[name = tensor<string, []>("op_5495_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5495_cast_fp16 = slice_by_index(begin = var_5495_begin_0, end = var_5495_end_0, end_mask = var_5495_end_mask_0, x = obj_223_cast_fp16)[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<int32, [4]> var_5498_begin_0 = const()[name = tensor<string, []>("op_5498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5498_end_0 = const()[name = tensor<string, []>("op_5498_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5498_end_mask_0 = const()[name = tensor<string, []>("op_5498_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5498_squeeze_mask_0 = const()[name = tensor<string, []>("op_5498_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5498_cast_fp16 = slice_by_index(begin = var_5498_begin_0, end = var_5498_end_0, end_mask = var_5498_end_mask_0, squeeze_mask = var_5498_squeeze_mask_0, x = var_5495_cast_fp16)[name = tensor<string, []>("op_5498_cast_fp16")];
+            tensor<int32, [4]> var_5513_begin_0 = const()[name = tensor<string, []>("op_5513_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_5513_end_0 = const()[name = tensor<string, []>("op_5513_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_5513_end_mask_0 = const()[name = tensor<string, []>("op_5513_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5513_cast_fp16 = slice_by_index(begin = var_5513_begin_0, end = var_5513_end_0, end_mask = var_5513_end_mask_0, x = obj_237_cast_fp16)[name = tensor<string, []>("op_5513_cast_fp16")];
+            tensor<int32, [4]> var_5516_begin_0 = const()[name = tensor<string, []>("op_5516_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5516_end_0 = const()[name = tensor<string, []>("op_5516_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5516_end_mask_0 = const()[name = tensor<string, []>("op_5516_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5516_squeeze_mask_0 = const()[name = tensor<string, []>("op_5516_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5516_cast_fp16 = slice_by_index(begin = var_5516_begin_0, end = var_5516_end_0, end_mask = var_5516_end_mask_0, squeeze_mask = var_5516_squeeze_mask_0, x = var_5513_cast_fp16)[name = tensor<string, []>("op_5516_cast_fp16")];
+            tensor<int32, [4]> var_5531_begin_0 = const()[name = tensor<string, []>("op_5531_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5531_end_0 = const()[name = tensor<string, []>("op_5531_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5531_end_mask_0 = const()[name = tensor<string, []>("op_5531_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5531_cast_fp16 = slice_by_index(begin = var_5531_begin_0, end = var_5531_end_0, end_mask = var_5531_end_mask_0, x = obj_293_cast_fp16)[name = tensor<string, []>("op_5531_cast_fp16")];
+            tensor<int32, [4]> var_5534_begin_0 = const()[name = tensor<string, []>("op_5534_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5534_end_0 = const()[name = tensor<string, []>("op_5534_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5534_end_mask_0 = const()[name = tensor<string, []>("op_5534_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5534_squeeze_mask_0 = const()[name = tensor<string, []>("op_5534_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5534_cast_fp16 = slice_by_index(begin = var_5534_begin_0, end = var_5534_end_0, end_mask = var_5534_end_mask_0, squeeze_mask = var_5534_squeeze_mask_0, x = var_5531_cast_fp16)[name = tensor<string, []>("op_5534_cast_fp16")];
+            tensor<int32, [4]> var_5549_begin_0 = const()[name = tensor<string, []>("op_5549_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_5549_end_0 = const()[name = tensor<string, []>("op_5549_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_5549_end_mask_0 = const()[name = tensor<string, []>("op_5549_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5549_cast_fp16 = slice_by_index(begin = var_5549_begin_0, end = var_5549_end_0, end_mask = var_5549_end_mask_0, x = obj_335_cast_fp16)[name = tensor<string, []>("op_5549_cast_fp16")];
+            tensor<int32, [4]> var_5552_begin_0 = const()[name = tensor<string, []>("op_5552_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5552_end_0 = const()[name = tensor<string, []>("op_5552_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_5552_end_mask_0 = const()[name = tensor<string, []>("op_5552_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_5552_squeeze_mask_0 = const()[name = tensor<string, []>("op_5552_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_5552_cast_fp16 = slice_by_index(begin = var_5552_begin_0, end = var_5552_end_0, end_mask = var_5552_end_mask_0, squeeze_mask = var_5552_squeeze_mask_0, x = var_5549_cast_fp16)[name = tensor<string, []>("op_5552_cast_fp16")];
+            tensor<int32, []> var_5559 = const()[name = tensor<string, []>("op_5559"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_5560_interleave_0 = const()[name = tensor<string, []>("op_5560_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1500]> var_5560_cast_fp16 = concat(axis = var_5559, interleave = var_5560_interleave_0, values = (var_5462_cast_fp16, var_5480_cast_fp16, var_5498_cast_fp16, var_5516_cast_fp16, var_5534_cast_fp16, var_5552_cast_fp16))[name = tensor<string, []>("op_5560_cast_fp16")];
+            tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> obj_keep_dims_0 = const()[name = tensor<string, []>("obj_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = obj_keep_dims_0, x = var_5560_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-medium/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-medium/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1486869d1da66773552e9714729d96b56d6d7838
--- /dev/null
+++ b/openai_whisper-medium/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:283878d285cb0e557eeb1c6a1524eb5fd33cae2c289a114bc9e72ca76c0bfc75
+size 913428274
diff --git a/openai_whisper-medium/config.json b/openai_whisper-medium/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6901659aef815672d5299b27e4b074e99540790
--- /dev/null
+++ b/openai_whisper-medium/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-medium", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 1024, "decoder_attention_heads": 16, "decoder_ffn_dim": 4096, "decoder_layerdrop": 0.0, "decoder_layers": 24, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 16, "encoder_ffn_dim": 4096, "encoder_layerdrop": 0.0, "encoder_layers": 24, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 24, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865}
\ No newline at end of file
diff --git a/openai_whisper-medium/generation_config.json b/openai_whisper-medium/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..77d9e57eb7b16f2ea46f771a764598a11ed563d9
--- /dev/null
+++ b/openai_whisper-medium/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[13, 15], [15, 4], [15, 15], [16, 1], [20, 0], [23, 4]], "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "decoder_start_token_id": 50258, "eos_token_id": 50257, "forced_decoder_ids": [[1, null], [2, 50359]], "is_multilingual": true, "lang_to_id": {"<|af|>": 50327, "<|am|>": 50334, "<|ar|>": 50272, "<|as|>": 50350, "<|az|>": 50304, "<|ba|>": 50355, "<|be|>": 50330, "<|bg|>": 50292, "<|bn|>": 50302, "<|bo|>": 50347, "<|br|>": 50309, "<|bs|>": 50315, "<|ca|>": 50270, "<|cs|>": 50283, "<|cy|>": 50297, "<|da|>": 50285, "<|de|>": 50261, "<|el|>": 50281, "<|en|>": 50259, "<|es|>": 50262, "<|et|>": 50307, "<|eu|>": 50310, "<|fa|>": 50300, "<|fi|>": 50277, "<|fo|>": 50338, "<|fr|>": 50265, "<|gl|>": 50319, "<|gu|>": 50333, "<|haw|>": 50352, "<|ha|>": 50354, "<|he|>": 50279, "<|hi|>": 50276, "<|hr|>": 50291, "<|ht|>": 50339, "<|hu|>": 50286, "<|hy|>": 50312, "<|id|>": 50275, "<|is|>": 50311, "<|it|>": 50274, "<|ja|>": 50266, "<|jw|>": 50356, "<|ka|>": 50329, "<|kk|>": 50316, "<|km|>": 50323, "<|kn|>": 50306, "<|ko|>": 50264, "<|la|>": 50294, "<|lb|>": 50345, "<|ln|>": 50353, "<|lo|>": 50336, "<|lt|>": 50293, "<|lv|>": 50301, "<|mg|>": 50349, "<|mi|>": 50295, "<|mk|>": 50308, "<|ml|>": 50296, "<|mn|>": 50314, "<|mr|>": 50320, "<|ms|>": 50282, "<|mt|>": 50343, "<|my|>": 50346, "<|ne|>": 50313, "<|nl|>": 50271, "<|nn|>": 50342, "<|no|>": 50288, "<|oc|>": 50328, "<|pa|>": 50321, "<|pl|>": 50269, "<|ps|>": 50340, "<|pt|>": 50267, "<|ro|>": 50284, "<|ru|>": 50263, "<|sa|>": 50344, "<|sd|>": 50332, "<|si|>": 50322, "<|sk|>": 50298, "<|sl|>": 50305, "<|sn|>": 50324, "<|so|>": 50326, "<|sq|>": 50317, "<|sr|>": 50303, "<|su|>": 50357, "<|sv|>": 50273, "<|sw|>": 50318, "<|ta|>": 50287, "<|te|>": 50299, "<|tg|>": 50331, "<|th|>": 50289, "<|tk|>": 50341, "<|tl|>": 50348, "<|tr|>": 50268, "<|tt|>": 50351, "<|uk|>": 50280, "<|ur|>": 50290, "<|uz|>": 50337, "<|vi|>": 50278, "<|yi|>": 50335, "<|yo|>": 50325, "<|zh|>": 50260}, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50363, "pad_token_id": 50257, "prev_sot_token_id": 50361, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "task_to_id": {"transcribe": 50359, "translate": 50358}, "transformers_version": "4.31.0.dev0"}
\ No newline at end of file
diff --git a/openai_whisper-small/.DS_Store b/openai_whisper-small/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..8bedfd126eac64ae307b38a413abe3225e60d1f1
Binary files /dev/null and b/openai_whisper-small/.DS_Store differ
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..22a5e3bdc61cea5ea516a617fb13ab917c5fe03f
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:211457b92a0ced67bb8625efe39799a0030c4fc71eb87d7284ea81043caccde7
+size 243
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26effa4133bbbe74215e0d5088259839b88ef10e
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d68f152b6573ac55203a3dc8383730e6ecde685c7d2a88815b89820c88e35371
+size 347
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f06d3ae86653eca79c3b7b8fbde3379d72185ee
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 156,
+      "Ios16.rsqrt" : 25,
+      "Ios16.mul" : 626,
+      "SliceByIndex" : 1008,
+      "Ios16.sub" : 25,
+      "Transpose" : 12,
+      "Ios16.einsum" : 1152,
+      "Ios16.conv" : 74,
+      "Ios16.add" : 50,
+      "Ios16.reduceMean" : 50,
+      "Ios16.softmax" : 576,
+      "Ios16.gelu" : 14,
+      "Ios16.batchNorm" : 25
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/model.mil b/openai_whisper-small/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0dd59f0859f9a655599067124f0583a5263a0be5
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,9382 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<int32, [2]> var_50 = const()[name = tensor<string, []>("op_50"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_56 = const()[name = tensor<string, []>("op_56"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_61 = const()[name = tensor<string, []>("op_61"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_66_pad_type_0 = const()[name = tensor<string, []>("op_66_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_66_pad_0 = const()[name = tensor<string, []>("op_66_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [768, 80, 1, 3]> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, [768, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [768]> var_47_to_fp16 = const()[name = tensor<string, []>("op_47_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368768)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_66_cast_fp16 = conv(bias = var_47_to_fp16, dilations = var_56, groups = var_61, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_50, weight = var_41_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_66_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_66_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [2]> var_90 = const()[name = tensor<string, []>("op_90"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_101 = const()[name = tensor<string, []>("op_101"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_106_pad_type_0 = const()[name = tensor<string, []>("op_106_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_106_pad_0 = const()[name = tensor<string, []>("op_106_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [768, 768, 1, 3]> var_81_to_fp16 = const()[name = tensor<string, []>("op_81_to_fp16"), val = tensor<fp16, [768, 768, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370368)))];
+            tensor<fp16, [768]> var_87_to_fp16 = const()[name = tensor<string, []>("op_87_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3909376)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_106_cast_fp16 = conv(bias = var_87_to_fp16, dilations = var_96, groups = var_101, pad = var_106_pad_0, pad_type = var_106_pad_type_0, strides = var_90, weight = var_81_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_106_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_106_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_124_to_fp16 = const()[name = tensor<string, []>("op_124_to_fp16"), val = tensor<fp16, [1, 768, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3910976)))];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_124_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_134 = const()[name = tensor<string, []>("op_134"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_152 = const()[name = tensor<string, []>("op_152"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_162, keep_dims = var_152, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_166 = const()[name = tensor<string, []>("op_166"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_167_cast_fp16 = reduce_mean(axes = var_166, keep_dims = var_152, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_167_cast_fp16")];
+            tensor<fp16, []> var_168_to_fp16 = const()[name = tensor<string, []>("op_168_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_169_cast_fp16 = add(x = var_167_cast_fp16, y = var_168_to_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_169_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6215040)))];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6216640)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6218240)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6219840)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_184 = const()[name = tensor<string, []>("op_184"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_186 = const()[name = tensor<string, []>("op_186"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6221440)))];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7401152)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_186, groups = var_151, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_184, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_190 = const()[name = tensor<string, []>("op_190"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_192 = const()[name = tensor<string, []>("op_192"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7402752)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_1_cast_fp16 = conv(dilations = var_192, groups = var_151, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_190, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<int32, [2]> var_197 = const()[name = tensor<string, []>("op_197"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_199 = const()[name = tensor<string, []>("op_199"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8582464)))];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9762176)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_199, groups = var_151, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_197, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_206_begin_0 = const()[name = tensor<string, []>("op_206_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_206_end_0 = const()[name = tensor<string, []>("op_206_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_206_end_mask_0 = const()[name = tensor<string, []>("op_206_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_206_cast_fp16 = slice_by_index(begin = var_206_begin_0, end = var_206_end_0, end_mask = var_206_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
+            tensor<int32, [4]> var_210_begin_0 = const()[name = tensor<string, []>("op_210_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_210_end_0 = const()[name = tensor<string, []>("op_210_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_210_end_mask_0 = const()[name = tensor<string, []>("op_210_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<int32, [4]> var_214_begin_0 = const()[name = tensor<string, []>("op_214_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_214_end_0 = const()[name = tensor<string, []>("op_214_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_214_end_mask_0 = const()[name = tensor<string, []>("op_214_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
+            tensor<int32, [4]> var_218_begin_0 = const()[name = tensor<string, []>("op_218_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_218_end_0 = const()[name = tensor<string, []>("op_218_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_218_end_mask_0 = const()[name = tensor<string, []>("op_218_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_218_cast_fp16")];
+            tensor<int32, [4]> var_222_begin_0 = const()[name = tensor<string, []>("op_222_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_222_end_0 = const()[name = tensor<string, []>("op_222_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_222_end_mask_0 = const()[name = tensor<string, []>("op_222_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
+            tensor<int32, [4]> var_226_begin_0 = const()[name = tensor<string, []>("op_226_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_226_end_0 = const()[name = tensor<string, []>("op_226_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_226_end_mask_0 = const()[name = tensor<string, []>("op_226_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_226_cast_fp16")];
+            tensor<int32, [4]> var_230_begin_0 = const()[name = tensor<string, []>("op_230_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_230_end_0 = const()[name = tensor<string, []>("op_230_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_230_end_mask_0 = const()[name = tensor<string, []>("op_230_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_230_cast_fp16")];
+            tensor<int32, [4]> var_234_begin_0 = const()[name = tensor<string, []>("op_234_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_234_end_0 = const()[name = tensor<string, []>("op_234_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_234_end_mask_0 = const()[name = tensor<string, []>("op_234_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<int32, [4]> var_238_begin_0 = const()[name = tensor<string, []>("op_238_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_238_end_0 = const()[name = tensor<string, []>("op_238_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_238_end_mask_0 = const()[name = tensor<string, []>("op_238_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
+            tensor<int32, [4]> var_242_begin_0 = const()[name = tensor<string, []>("op_242_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_242_end_0 = const()[name = tensor<string, []>("op_242_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_242_end_mask_0 = const()[name = tensor<string, []>("op_242_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_242_cast_fp16")];
+            tensor<int32, [4]> var_246_begin_0 = const()[name = tensor<string, []>("op_246_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_246_end_0 = const()[name = tensor<string, []>("op_246_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_246_end_mask_0 = const()[name = tensor<string, []>("op_246_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
+            tensor<int32, [4]> var_250_begin_0 = const()[name = tensor<string, []>("op_250_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_250_end_0 = const()[name = tensor<string, []>("op_250_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_250_end_mask_0 = const()[name = tensor<string, []>("op_250_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<int32, [4]> var_259_begin_0 = const()[name = tensor<string, []>("op_259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_259_end_0 = const()[name = tensor<string, []>("op_259_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_259_end_mask_0 = const()[name = tensor<string, []>("op_259_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
+            tensor<int32, [4]> var_266_begin_0 = const()[name = tensor<string, []>("op_266_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_266_end_0 = const()[name = tensor<string, []>("op_266_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_266_end_mask_0 = const()[name = tensor<string, []>("op_266_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_266_cast_fp16")];
+            tensor<int32, [4]> var_273_begin_0 = const()[name = tensor<string, []>("op_273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_273_end_0 = const()[name = tensor<string, []>("op_273_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_273_end_mask_0 = const()[name = tensor<string, []>("op_273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_273_cast_fp16 = slice_by_index(begin = var_273_begin_0, end = var_273_end_0, end_mask = var_273_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_273_cast_fp16")];
+            tensor<int32, [4]> var_280_begin_0 = const()[name = tensor<string, []>("op_280_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_280_end_0 = const()[name = tensor<string, []>("op_280_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_280_end_mask_0 = const()[name = tensor<string, []>("op_280_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_280_cast_fp16 = slice_by_index(begin = var_280_begin_0, end = var_280_end_0, end_mask = var_280_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<int32, [4]> var_287_begin_0 = const()[name = tensor<string, []>("op_287_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_287_end_0 = const()[name = tensor<string, []>("op_287_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_287_end_mask_0 = const()[name = tensor<string, []>("op_287_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<int32, [4]> var_294_begin_0 = const()[name = tensor<string, []>("op_294_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_294_end_0 = const()[name = tensor<string, []>("op_294_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_294_end_mask_0 = const()[name = tensor<string, []>("op_294_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<int32, [4]> var_301_begin_0 = const()[name = tensor<string, []>("op_301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_301_end_0 = const()[name = tensor<string, []>("op_301_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_301_end_mask_0 = const()[name = tensor<string, []>("op_301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<int32, [4]> var_308_begin_0 = const()[name = tensor<string, []>("op_308_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_308_end_0 = const()[name = tensor<string, []>("op_308_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_308_end_mask_0 = const()[name = tensor<string, []>("op_308_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_308_cast_fp16")];
+            tensor<int32, [4]> var_315_begin_0 = const()[name = tensor<string, []>("op_315_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_315_end_0 = const()[name = tensor<string, []>("op_315_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_315_end_mask_0 = const()[name = tensor<string, []>("op_315_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = var_315_end_0, end_mask = var_315_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<int32, [4]> var_322_begin_0 = const()[name = tensor<string, []>("op_322_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_322_end_0 = const()[name = tensor<string, []>("op_322_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_322_end_mask_0 = const()[name = tensor<string, []>("op_322_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_322_cast_fp16")];
+            tensor<int32, [4]> var_329_begin_0 = const()[name = tensor<string, []>("op_329_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_329_end_0 = const()[name = tensor<string, []>("op_329_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_329_end_mask_0 = const()[name = tensor<string, []>("op_329_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_329_cast_fp16 = slice_by_index(begin = var_329_begin_0, end = var_329_end_0, end_mask = var_329_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_329_cast_fp16")];
+            tensor<int32, [4]> var_336_begin_0 = const()[name = tensor<string, []>("op_336_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_336_end_0 = const()[name = tensor<string, []>("op_336_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_336_end_mask_0 = const()[name = tensor<string, []>("op_336_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
+            tensor<int32, [4]> var_343_begin_0 = const()[name = tensor<string, []>("op_343_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_343_end_0 = const()[name = tensor<string, []>("op_343_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_343_end_mask_0 = const()[name = tensor<string, []>("op_343_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
+            tensor<int32, [4]> var_350_begin_0 = const()[name = tensor<string, []>("op_350_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_350_end_0 = const()[name = tensor<string, []>("op_350_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_350_end_mask_0 = const()[name = tensor<string, []>("op_350_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_350_cast_fp16 = slice_by_index(begin = var_350_begin_0, end = var_350_end_0, end_mask = var_350_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_350_cast_fp16")];
+            tensor<int32, [4]> var_357_begin_0 = const()[name = tensor<string, []>("op_357_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_357_end_0 = const()[name = tensor<string, []>("op_357_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_357_end_mask_0 = const()[name = tensor<string, []>("op_357_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_357_cast_fp16 = slice_by_index(begin = var_357_begin_0, end = var_357_end_0, end_mask = var_357_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_357_cast_fp16")];
+            tensor<int32, [4]> var_364_begin_0 = const()[name = tensor<string, []>("op_364_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_364_end_0 = const()[name = tensor<string, []>("op_364_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_364_end_mask_0 = const()[name = tensor<string, []>("op_364_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<int32, [4]> var_371_begin_0 = const()[name = tensor<string, []>("op_371_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_371_end_0 = const()[name = tensor<string, []>("op_371_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_371_end_mask_0 = const()[name = tensor<string, []>("op_371_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_371_cast_fp16 = slice_by_index(begin = var_371_begin_0, end = var_371_end_0, end_mask = var_371_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_371_cast_fp16")];
+            tensor<int32, [4]> var_378_begin_0 = const()[name = tensor<string, []>("op_378_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_378_end_0 = const()[name = tensor<string, []>("op_378_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_378_end_mask_0 = const()[name = tensor<string, []>("op_378_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_378_cast_fp16 = slice_by_index(begin = var_378_begin_0, end = var_378_end_0, end_mask = var_378_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_378_cast_fp16")];
+            tensor<int32, [4]> var_385_begin_0 = const()[name = tensor<string, []>("op_385_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_385_end_0 = const()[name = tensor<string, []>("op_385_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_385_end_mask_0 = const()[name = tensor<string, []>("op_385_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_385_cast_fp16 = slice_by_index(begin = var_385_begin_0, end = var_385_end_0, end_mask = var_385_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_385_cast_fp16")];
+            tensor<int32, [4]> var_392_begin_0 = const()[name = tensor<string, []>("op_392_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_392_end_0 = const()[name = tensor<string, []>("op_392_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_392_end_mask_0 = const()[name = tensor<string, []>("op_392_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_392_cast_fp16")];
+            tensor<int32, [4]> var_399_begin_0 = const()[name = tensor<string, []>("op_399_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_399_end_0 = const()[name = tensor<string, []>("op_399_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_399_end_mask_0 = const()[name = tensor<string, []>("op_399_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_399_cast_fp16")];
+            tensor<int32, [4]> var_406_begin_0 = const()[name = tensor<string, []>("op_406_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_406_end_0 = const()[name = tensor<string, []>("op_406_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_406_end_mask_0 = const()[name = tensor<string, []>("op_406_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_406_cast_fp16 = slice_by_index(begin = var_406_begin_0, end = var_406_end_0, end_mask = var_406_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_406_cast_fp16")];
+            tensor<int32, [4]> var_413_begin_0 = const()[name = tensor<string, []>("op_413_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_413_end_0 = const()[name = tensor<string, []>("op_413_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_413_end_mask_0 = const()[name = tensor<string, []>("op_413_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_413_cast_fp16 = slice_by_index(begin = var_413_begin_0, end = var_413_end_0, end_mask = var_413_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<int32, [4]> var_420_begin_0 = const()[name = tensor<string, []>("op_420_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_420_end_0 = const()[name = tensor<string, []>("op_420_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_420_end_mask_0 = const()[name = tensor<string, []>("op_420_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_420_cast_fp16")];
+            tensor<int32, [4]> var_427_begin_0 = const()[name = tensor<string, []>("op_427_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_427_end_0 = const()[name = tensor<string, []>("op_427_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_427_end_mask_0 = const()[name = tensor<string, []>("op_427_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_427_cast_fp16 = slice_by_index(begin = var_427_begin_0, end = var_427_end_0, end_mask = var_427_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_427_cast_fp16")];
+            tensor<int32, [4]> var_434_begin_0 = const()[name = tensor<string, []>("op_434_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_434_end_0 = const()[name = tensor<string, []>("op_434_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_434_end_mask_0 = const()[name = tensor<string, []>("op_434_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_434_cast_fp16 = slice_by_index(begin = var_434_begin_0, end = var_434_end_0, end_mask = var_434_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<int32, [4]> var_441_begin_0 = const()[name = tensor<string, []>("op_441_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_441_end_0 = const()[name = tensor<string, []>("op_441_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_441_end_mask_0 = const()[name = tensor<string, []>("op_441_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_441_cast_fp16 = slice_by_index(begin = var_441_begin_0, end = var_441_end_0, end_mask = var_441_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_441_cast_fp16")];
+            tensor<int32, [4]> var_448_begin_0 = const()[name = tensor<string, []>("op_448_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_448_end_0 = const()[name = tensor<string, []>("op_448_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_448_end_mask_0 = const()[name = tensor<string, []>("op_448_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_448_cast_fp16")];
+            tensor<int32, [4]> var_455_begin_0 = const()[name = tensor<string, []>("op_455_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_455_end_0 = const()[name = tensor<string, []>("op_455_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_455_end_mask_0 = const()[name = tensor<string, []>("op_455_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_455_cast_fp16")];
+            tensor<int32, [4]> var_462_begin_0 = const()[name = tensor<string, []>("op_462_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_462_end_0 = const()[name = tensor<string, []>("op_462_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_462_end_mask_0 = const()[name = tensor<string, []>("op_462_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_462_cast_fp16 = slice_by_index(begin = var_462_begin_0, end = var_462_end_0, end_mask = var_462_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_462_cast_fp16")];
+            tensor<int32, [4]> var_469_begin_0 = const()[name = tensor<string, []>("op_469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_469_end_0 = const()[name = tensor<string, []>("op_469_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_469_end_mask_0 = const()[name = tensor<string, []>("op_469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_469_cast_fp16 = slice_by_index(begin = var_469_begin_0, end = var_469_end_0, end_mask = var_469_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<int32, [4]> var_476_begin_0 = const()[name = tensor<string, []>("op_476_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_476_end_0 = const()[name = tensor<string, []>("op_476_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_476_end_mask_0 = const()[name = tensor<string, []>("op_476_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_476_cast_fp16 = slice_by_index(begin = var_476_begin_0, end = var_476_end_0, end_mask = var_476_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_476_cast_fp16")];
+            tensor<int32, [4]> var_483_begin_0 = const()[name = tensor<string, []>("op_483_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_483_end_0 = const()[name = tensor<string, []>("op_483_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_483_end_mask_0 = const()[name = tensor<string, []>("op_483_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_483_cast_fp16 = slice_by_index(begin = var_483_begin_0, end = var_483_end_0, end_mask = var_483_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<int32, [4]> var_490_begin_0 = const()[name = tensor<string, []>("op_490_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_490_end_0 = const()[name = tensor<string, []>("op_490_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_490_end_mask_0 = const()[name = tensor<string, []>("op_490_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<int32, [4]> var_497_begin_0 = const()[name = tensor<string, []>("op_497_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_497_end_0 = const()[name = tensor<string, []>("op_497_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_497_end_mask_0 = const()[name = tensor<string, []>("op_497_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_497_cast_fp16 = slice_by_index(begin = var_497_begin_0, end = var_497_end_0, end_mask = var_497_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_497_cast_fp16")];
+            tensor<int32, [4]> var_504_begin_0 = const()[name = tensor<string, []>("op_504_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_504_end_0 = const()[name = tensor<string, []>("op_504_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_504_end_mask_0 = const()[name = tensor<string, []>("op_504_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_504_cast_fp16 = slice_by_index(begin = var_504_begin_0, end = var_504_end_0, end_mask = var_504_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_504_cast_fp16")];
+            tensor<int32, [4]> var_511_begin_0 = const()[name = tensor<string, []>("op_511_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_511_end_0 = const()[name = tensor<string, []>("op_511_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_511_end_mask_0 = const()[name = tensor<string, []>("op_511_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_511_cast_fp16 = slice_by_index(begin = var_511_begin_0, end = var_511_end_0, end_mask = var_511_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_511_cast_fp16")];
+            tensor<int32, [4]> var_518_begin_0 = const()[name = tensor<string, []>("op_518_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_518_end_0 = const()[name = tensor<string, []>("op_518_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_518_end_mask_0 = const()[name = tensor<string, []>("op_518_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<int32, [4]> var_525_begin_0 = const()[name = tensor<string, []>("op_525_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_525_end_0 = const()[name = tensor<string, []>("op_525_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_525_end_mask_0 = const()[name = tensor<string, []>("op_525_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_525_cast_fp16 = slice_by_index(begin = var_525_begin_0, end = var_525_end_0, end_mask = var_525_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<int32, [4]> var_532_begin_0 = const()[name = tensor<string, []>("op_532_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_532_end_0 = const()[name = tensor<string, []>("op_532_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_532_end_mask_0 = const()[name = tensor<string, []>("op_532_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_532_cast_fp16 = slice_by_index(begin = var_532_begin_0, end = var_532_end_0, end_mask = var_532_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_532_cast_fp16")];
+            tensor<int32, [4]> var_539_begin_0 = const()[name = tensor<string, []>("op_539_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_539_end_0 = const()[name = tensor<string, []>("op_539_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_539_end_mask_0 = const()[name = tensor<string, []>("op_539_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_539_cast_fp16 = slice_by_index(begin = var_539_begin_0, end = var_539_end_0, end_mask = var_539_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_539_cast_fp16")];
+            tensor<int32, [4]> var_546_begin_0 = const()[name = tensor<string, []>("op_546_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_546_end_0 = const()[name = tensor<string, []>("op_546_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_546_end_mask_0 = const()[name = tensor<string, []>("op_546_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<int32, [4]> var_553_begin_0 = const()[name = tensor<string, []>("op_553_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_553_end_0 = const()[name = tensor<string, []>("op_553_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_553_end_mask_0 = const()[name = tensor<string, []>("op_553_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<int32, [4]> var_560_begin_0 = const()[name = tensor<string, []>("op_560_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_560_end_0 = const()[name = tensor<string, []>("op_560_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_560_end_mask_0 = const()[name = tensor<string, []>("op_560_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_560_cast_fp16 = slice_by_index(begin = var_560_begin_0, end = var_560_end_0, end_mask = var_560_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<int32, [4]> var_567_begin_0 = const()[name = tensor<string, []>("op_567_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_567_end_0 = const()[name = tensor<string, []>("op_567_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_567_end_mask_0 = const()[name = tensor<string, []>("op_567_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_567_cast_fp16 = slice_by_index(begin = var_567_begin_0, end = var_567_end_0, end_mask = var_567_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<int32, [4]> var_574_begin_0 = const()[name = tensor<string, []>("op_574_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_574_end_0 = const()[name = tensor<string, []>("op_574_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_574_end_mask_0 = const()[name = tensor<string, []>("op_574_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<int32, [4]> var_581_begin_0 = const()[name = tensor<string, []>("op_581_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_581_end_0 = const()[name = tensor<string, []>("op_581_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_581_end_mask_0 = const()[name = tensor<string, []>("op_581_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<int32, [4]> var_588_begin_0 = const()[name = tensor<string, []>("op_588_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_588_end_0 = const()[name = tensor<string, []>("op_588_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_588_end_mask_0 = const()[name = tensor<string, []>("op_588_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_588_cast_fp16 = slice_by_index(begin = var_588_begin_0, end = var_588_end_0, end_mask = var_588_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<int32, [4]> k_1_perm_0 = const()[name = tensor<string, []>("k_1_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_593_begin_0 = const()[name = tensor<string, []>("op_593_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_593_end_0 = const()[name = tensor<string, []>("op_593_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_593_end_mask_0 = const()[name = tensor<string, []>("op_593_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_11 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_593_cast_fp16 = slice_by_index(begin = var_593_begin_0, end = var_593_end_0, end_mask = var_593_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_593_cast_fp16")];
+            tensor<int32, [4]> var_597_begin_0 = const()[name = tensor<string, []>("op_597_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_597_end_0 = const()[name = tensor<string, []>("op_597_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_597_end_mask_0 = const()[name = tensor<string, []>("op_597_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_597_cast_fp16 = slice_by_index(begin = var_597_begin_0, end = var_597_end_0, end_mask = var_597_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_597_cast_fp16")];
+            tensor<int32, [4]> var_601_begin_0 = const()[name = tensor<string, []>("op_601_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_601_end_0 = const()[name = tensor<string, []>("op_601_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_601_end_mask_0 = const()[name = tensor<string, []>("op_601_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_601_cast_fp16 = slice_by_index(begin = var_601_begin_0, end = var_601_end_0, end_mask = var_601_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_601_cast_fp16")];
+            tensor<int32, [4]> var_605_begin_0 = const()[name = tensor<string, []>("op_605_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_605_end_0 = const()[name = tensor<string, []>("op_605_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_605_end_mask_0 = const()[name = tensor<string, []>("op_605_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_605_cast_fp16 = slice_by_index(begin = var_605_begin_0, end = var_605_end_0, end_mask = var_605_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_605_cast_fp16")];
+            tensor<int32, [4]> var_609_begin_0 = const()[name = tensor<string, []>("op_609_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_609_end_0 = const()[name = tensor<string, []>("op_609_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_609_end_mask_0 = const()[name = tensor<string, []>("op_609_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = var_609_end_0, end_mask = var_609_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_609_cast_fp16")];
+            tensor<int32, [4]> var_613_begin_0 = const()[name = tensor<string, []>("op_613_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_613_end_0 = const()[name = tensor<string, []>("op_613_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_613_end_mask_0 = const()[name = tensor<string, []>("op_613_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_613_cast_fp16 = slice_by_index(begin = var_613_begin_0, end = var_613_end_0, end_mask = var_613_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_613_cast_fp16")];
+            tensor<int32, [4]> var_617_begin_0 = const()[name = tensor<string, []>("op_617_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_617_end_0 = const()[name = tensor<string, []>("op_617_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_617_end_mask_0 = const()[name = tensor<string, []>("op_617_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_617_cast_fp16 = slice_by_index(begin = var_617_begin_0, end = var_617_end_0, end_mask = var_617_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<int32, [4]> var_621_begin_0 = const()[name = tensor<string, []>("op_621_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_621_end_0 = const()[name = tensor<string, []>("op_621_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_621_end_mask_0 = const()[name = tensor<string, []>("op_621_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_621_cast_fp16 = slice_by_index(begin = var_621_begin_0, end = var_621_end_0, end_mask = var_621_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_621_cast_fp16")];
+            tensor<int32, [4]> var_625_begin_0 = const()[name = tensor<string, []>("op_625_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_625_end_0 = const()[name = tensor<string, []>("op_625_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_625_end_mask_0 = const()[name = tensor<string, []>("op_625_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_625_cast_fp16 = slice_by_index(begin = var_625_begin_0, end = var_625_end_0, end_mask = var_625_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<int32, [4]> var_629_begin_0 = const()[name = tensor<string, []>("op_629_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_629_end_0 = const()[name = tensor<string, []>("op_629_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_629_end_mask_0 = const()[name = tensor<string, []>("op_629_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_629_cast_fp16 = slice_by_index(begin = var_629_begin_0, end = var_629_end_0, end_mask = var_629_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<int32, [4]> var_633_begin_0 = const()[name = tensor<string, []>("op_633_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_633_end_0 = const()[name = tensor<string, []>("op_633_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_633_end_mask_0 = const()[name = tensor<string, []>("op_633_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_633_cast_fp16 = slice_by_index(begin = var_633_begin_0, end = var_633_end_0, end_mask = var_633_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<int32, [4]> var_637_begin_0 = const()[name = tensor<string, []>("op_637_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_637_end_0 = const()[name = tensor<string, []>("op_637_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_637_end_mask_0 = const()[name = tensor<string, []>("op_637_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_637_cast_fp16 = slice_by_index(begin = var_637_begin_0, end = var_637_end_0, end_mask = var_637_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_637_cast_fp16")];
+            tensor<int32, [4]> var_639_begin_0 = const()[name = tensor<string, []>("op_639_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_639_end_0 = const()[name = tensor<string, []>("op_639_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_639_end_mask_0 = const()[name = tensor<string, []>("op_639_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_639_cast_fp16 = slice_by_index(begin = var_639_begin_0, end = var_639_end_0, end_mask = var_639_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_639_cast_fp16")];
+            tensor<int32, [4]> var_643_begin_0 = const()[name = tensor<string, []>("op_643_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_643_end_0 = const()[name = tensor<string, []>("op_643_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_643_end_mask_0 = const()[name = tensor<string, []>("op_643_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16 = slice_by_index(begin = var_643_begin_0, end = var_643_end_0, end_mask = var_643_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<int32, [4]> var_647_begin_0 = const()[name = tensor<string, []>("op_647_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_647_end_0 = const()[name = tensor<string, []>("op_647_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_647_end_mask_0 = const()[name = tensor<string, []>("op_647_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_647_cast_fp16 = slice_by_index(begin = var_647_begin_0, end = var_647_end_0, end_mask = var_647_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
+            tensor<int32, [4]> var_651_begin_0 = const()[name = tensor<string, []>("op_651_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_651_end_0 = const()[name = tensor<string, []>("op_651_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_651_end_mask_0 = const()[name = tensor<string, []>("op_651_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_651_cast_fp16 = slice_by_index(begin = var_651_begin_0, end = var_651_end_0, end_mask = var_651_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<int32, [4]> var_655_begin_0 = const()[name = tensor<string, []>("op_655_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_655_end_0 = const()[name = tensor<string, []>("op_655_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_655_end_mask_0 = const()[name = tensor<string, []>("op_655_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_655_cast_fp16 = slice_by_index(begin = var_655_begin_0, end = var_655_end_0, end_mask = var_655_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_655_cast_fp16")];
+            tensor<int32, [4]> var_659_begin_0 = const()[name = tensor<string, []>("op_659_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_659_end_0 = const()[name = tensor<string, []>("op_659_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_659_end_mask_0 = const()[name = tensor<string, []>("op_659_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_659_cast_fp16 = slice_by_index(begin = var_659_begin_0, end = var_659_end_0, end_mask = var_659_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_659_cast_fp16")];
+            tensor<int32, [4]> var_663_begin_0 = const()[name = tensor<string, []>("op_663_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_663_end_0 = const()[name = tensor<string, []>("op_663_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_663_end_mask_0 = const()[name = tensor<string, []>("op_663_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_663_cast_fp16 = slice_by_index(begin = var_663_begin_0, end = var_663_end_0, end_mask = var_663_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_663_cast_fp16")];
+            tensor<int32, [4]> var_667_begin_0 = const()[name = tensor<string, []>("op_667_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_667_end_0 = const()[name = tensor<string, []>("op_667_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_667_end_mask_0 = const()[name = tensor<string, []>("op_667_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_667_cast_fp16 = slice_by_index(begin = var_667_begin_0, end = var_667_end_0, end_mask = var_667_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_667_cast_fp16")];
+            tensor<int32, [4]> var_671_begin_0 = const()[name = tensor<string, []>("op_671_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_671_end_0 = const()[name = tensor<string, []>("op_671_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_671_end_mask_0 = const()[name = tensor<string, []>("op_671_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_671_cast_fp16")];
+            tensor<int32, [4]> var_675_begin_0 = const()[name = tensor<string, []>("op_675_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_675_end_0 = const()[name = tensor<string, []>("op_675_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_675_end_mask_0 = const()[name = tensor<string, []>("op_675_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_675_cast_fp16 = slice_by_index(begin = var_675_begin_0, end = var_675_end_0, end_mask = var_675_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<int32, [4]> var_679_begin_0 = const()[name = tensor<string, []>("op_679_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_679_end_0 = const()[name = tensor<string, []>("op_679_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_679_end_mask_0 = const()[name = tensor<string, []>("op_679_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_679_cast_fp16 = slice_by_index(begin = var_679_begin_0, end = var_679_end_0, end_mask = var_679_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_679_cast_fp16")];
+            tensor<int32, [4]> var_683_begin_0 = const()[name = tensor<string, []>("op_683_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_683_end_0 = const()[name = tensor<string, []>("op_683_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_683_end_mask_0 = const()[name = tensor<string, []>("op_683_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_683_cast_fp16 = slice_by_index(begin = var_683_begin_0, end = var_683_end_0, end_mask = var_683_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_683_cast_fp16")];
+            tensor<string, []> var_687_equation_0 = const()[name = tensor<string, []>("op_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_687_cast_fp16 = einsum(equation = var_687_equation_0, values = (var_593_cast_fp16, var_259_cast_fp16))[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, []> var_688_to_fp16 = const()[name = tensor<string, []>("op_688_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1_cast_fp16 = mul(x = var_687_cast_fp16, y = var_688_to_fp16)[name = tensor<string, []>("aw_chunk_1_cast_fp16")];
+            tensor<string, []> var_691_equation_0 = const()[name = tensor<string, []>("op_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_691_cast_fp16 = einsum(equation = var_691_equation_0, values = (var_593_cast_fp16, var_266_cast_fp16))[name = tensor<string, []>("op_691_cast_fp16")];
+            tensor<fp16, []> var_692_to_fp16 = const()[name = tensor<string, []>("op_692_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_3_cast_fp16 = mul(x = var_691_cast_fp16, y = var_692_to_fp16)[name = tensor<string, []>("aw_chunk_3_cast_fp16")];
+            tensor<string, []> var_695_equation_0 = const()[name = tensor<string, []>("op_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_695_cast_fp16 = einsum(equation = var_695_equation_0, values = (var_593_cast_fp16, var_273_cast_fp16))[name = tensor<string, []>("op_695_cast_fp16")];
+            tensor<fp16, []> var_696_to_fp16 = const()[name = tensor<string, []>("op_696_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_5_cast_fp16 = mul(x = var_695_cast_fp16, y = var_696_to_fp16)[name = tensor<string, []>("aw_chunk_5_cast_fp16")];
+            tensor<string, []> var_699_equation_0 = const()[name = tensor<string, []>("op_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_699_cast_fp16 = einsum(equation = var_699_equation_0, values = (var_593_cast_fp16, var_280_cast_fp16))[name = tensor<string, []>("op_699_cast_fp16")];
+            tensor<fp16, []> var_700_to_fp16 = const()[name = tensor<string, []>("op_700_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_7_cast_fp16 = mul(x = var_699_cast_fp16, y = var_700_to_fp16)[name = tensor<string, []>("aw_chunk_7_cast_fp16")];
+            tensor<string, []> var_703_equation_0 = const()[name = tensor<string, []>("op_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_703_cast_fp16 = einsum(equation = var_703_equation_0, values = (var_597_cast_fp16, var_287_cast_fp16))[name = tensor<string, []>("op_703_cast_fp16")];
+            tensor<fp16, []> var_704_to_fp16 = const()[name = tensor<string, []>("op_704_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_9_cast_fp16 = mul(x = var_703_cast_fp16, y = var_704_to_fp16)[name = tensor<string, []>("aw_chunk_9_cast_fp16")];
+            tensor<string, []> var_707_equation_0 = const()[name = tensor<string, []>("op_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_707_cast_fp16 = einsum(equation = var_707_equation_0, values = (var_597_cast_fp16, var_294_cast_fp16))[name = tensor<string, []>("op_707_cast_fp16")];
+            tensor<fp16, []> var_708_to_fp16 = const()[name = tensor<string, []>("op_708_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_11_cast_fp16 = mul(x = var_707_cast_fp16, y = var_708_to_fp16)[name = tensor<string, []>("aw_chunk_11_cast_fp16")];
+            tensor<string, []> var_711_equation_0 = const()[name = tensor<string, []>("op_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_711_cast_fp16 = einsum(equation = var_711_equation_0, values = (var_597_cast_fp16, var_301_cast_fp16))[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<fp16, []> var_712_to_fp16 = const()[name = tensor<string, []>("op_712_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_13_cast_fp16 = mul(x = var_711_cast_fp16, y = var_712_to_fp16)[name = tensor<string, []>("aw_chunk_13_cast_fp16")];
+            tensor<string, []> var_715_equation_0 = const()[name = tensor<string, []>("op_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_715_cast_fp16 = einsum(equation = var_715_equation_0, values = (var_597_cast_fp16, var_308_cast_fp16))[name = tensor<string, []>("op_715_cast_fp16")];
+            tensor<fp16, []> var_716_to_fp16 = const()[name = tensor<string, []>("op_716_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_15_cast_fp16 = mul(x = var_715_cast_fp16, y = var_716_to_fp16)[name = tensor<string, []>("aw_chunk_15_cast_fp16")];
+            tensor<string, []> var_719_equation_0 = const()[name = tensor<string, []>("op_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_719_cast_fp16 = einsum(equation = var_719_equation_0, values = (var_601_cast_fp16, var_315_cast_fp16))[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, []> var_720_to_fp16 = const()[name = tensor<string, []>("op_720_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_17_cast_fp16 = mul(x = var_719_cast_fp16, y = var_720_to_fp16)[name = tensor<string, []>("aw_chunk_17_cast_fp16")];
+            tensor<string, []> var_723_equation_0 = const()[name = tensor<string, []>("op_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_723_cast_fp16 = einsum(equation = var_723_equation_0, values = (var_601_cast_fp16, var_322_cast_fp16))[name = tensor<string, []>("op_723_cast_fp16")];
+            tensor<fp16, []> var_724_to_fp16 = const()[name = tensor<string, []>("op_724_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_19_cast_fp16 = mul(x = var_723_cast_fp16, y = var_724_to_fp16)[name = tensor<string, []>("aw_chunk_19_cast_fp16")];
+            tensor<string, []> var_727_equation_0 = const()[name = tensor<string, []>("op_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_727_cast_fp16 = einsum(equation = var_727_equation_0, values = (var_601_cast_fp16, var_329_cast_fp16))[name = tensor<string, []>("op_727_cast_fp16")];
+            tensor<fp16, []> var_728_to_fp16 = const()[name = tensor<string, []>("op_728_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_21_cast_fp16 = mul(x = var_727_cast_fp16, y = var_728_to_fp16)[name = tensor<string, []>("aw_chunk_21_cast_fp16")];
+            tensor<string, []> var_731_equation_0 = const()[name = tensor<string, []>("op_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_731_cast_fp16 = einsum(equation = var_731_equation_0, values = (var_601_cast_fp16, var_336_cast_fp16))[name = tensor<string, []>("op_731_cast_fp16")];
+            tensor<fp16, []> var_732_to_fp16 = const()[name = tensor<string, []>("op_732_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_23_cast_fp16 = mul(x = var_731_cast_fp16, y = var_732_to_fp16)[name = tensor<string, []>("aw_chunk_23_cast_fp16")];
+            tensor<string, []> var_735_equation_0 = const()[name = tensor<string, []>("op_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_735_cast_fp16 = einsum(equation = var_735_equation_0, values = (var_605_cast_fp16, var_343_cast_fp16))[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<fp16, []> var_736_to_fp16 = const()[name = tensor<string, []>("op_736_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_25_cast_fp16 = mul(x = var_735_cast_fp16, y = var_736_to_fp16)[name = tensor<string, []>("aw_chunk_25_cast_fp16")];
+            tensor<string, []> var_739_equation_0 = const()[name = tensor<string, []>("op_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_739_cast_fp16 = einsum(equation = var_739_equation_0, values = (var_605_cast_fp16, var_350_cast_fp16))[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<fp16, []> var_740_to_fp16 = const()[name = tensor<string, []>("op_740_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_27_cast_fp16 = mul(x = var_739_cast_fp16, y = var_740_to_fp16)[name = tensor<string, []>("aw_chunk_27_cast_fp16")];
+            tensor<string, []> var_743_equation_0 = const()[name = tensor<string, []>("op_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_743_cast_fp16 = einsum(equation = var_743_equation_0, values = (var_605_cast_fp16, var_357_cast_fp16))[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<fp16, []> var_744_to_fp16 = const()[name = tensor<string, []>("op_744_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_29_cast_fp16 = mul(x = var_743_cast_fp16, y = var_744_to_fp16)[name = tensor<string, []>("aw_chunk_29_cast_fp16")];
+            tensor<string, []> var_747_equation_0 = const()[name = tensor<string, []>("op_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_747_cast_fp16 = einsum(equation = var_747_equation_0, values = (var_605_cast_fp16, var_364_cast_fp16))[name = tensor<string, []>("op_747_cast_fp16")];
+            tensor<fp16, []> var_748_to_fp16 = const()[name = tensor<string, []>("op_748_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_31_cast_fp16 = mul(x = var_747_cast_fp16, y = var_748_to_fp16)[name = tensor<string, []>("aw_chunk_31_cast_fp16")];
+            tensor<string, []> var_751_equation_0 = const()[name = tensor<string, []>("op_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_751_cast_fp16 = einsum(equation = var_751_equation_0, values = (var_609_cast_fp16, var_371_cast_fp16))[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<fp16, []> var_752_to_fp16 = const()[name = tensor<string, []>("op_752_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_33_cast_fp16 = mul(x = var_751_cast_fp16, y = var_752_to_fp16)[name = tensor<string, []>("aw_chunk_33_cast_fp16")];
+            tensor<string, []> var_755_equation_0 = const()[name = tensor<string, []>("op_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_755_cast_fp16 = einsum(equation = var_755_equation_0, values = (var_609_cast_fp16, var_378_cast_fp16))[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<fp16, []> var_756_to_fp16 = const()[name = tensor<string, []>("op_756_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_35_cast_fp16 = mul(x = var_755_cast_fp16, y = var_756_to_fp16)[name = tensor<string, []>("aw_chunk_35_cast_fp16")];
+            tensor<string, []> var_759_equation_0 = const()[name = tensor<string, []>("op_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_759_cast_fp16 = einsum(equation = var_759_equation_0, values = (var_609_cast_fp16, var_385_cast_fp16))[name = tensor<string, []>("op_759_cast_fp16")];
+            tensor<fp16, []> var_760_to_fp16 = const()[name = tensor<string, []>("op_760_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_37_cast_fp16 = mul(x = var_759_cast_fp16, y = var_760_to_fp16)[name = tensor<string, []>("aw_chunk_37_cast_fp16")];
+            tensor<string, []> var_763_equation_0 = const()[name = tensor<string, []>("op_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_763_cast_fp16 = einsum(equation = var_763_equation_0, values = (var_609_cast_fp16, var_392_cast_fp16))[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<fp16, []> var_764_to_fp16 = const()[name = tensor<string, []>("op_764_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_39_cast_fp16 = mul(x = var_763_cast_fp16, y = var_764_to_fp16)[name = tensor<string, []>("aw_chunk_39_cast_fp16")];
+            tensor<string, []> var_767_equation_0 = const()[name = tensor<string, []>("op_767_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_767_cast_fp16 = einsum(equation = var_767_equation_0, values = (var_613_cast_fp16, var_399_cast_fp16))[name = tensor<string, []>("op_767_cast_fp16")];
+            tensor<fp16, []> var_768_to_fp16 = const()[name = tensor<string, []>("op_768_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_41_cast_fp16 = mul(x = var_767_cast_fp16, y = var_768_to_fp16)[name = tensor<string, []>("aw_chunk_41_cast_fp16")];
+            tensor<string, []> var_771_equation_0 = const()[name = tensor<string, []>("op_771_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_771_cast_fp16 = einsum(equation = var_771_equation_0, values = (var_613_cast_fp16, var_406_cast_fp16))[name = tensor<string, []>("op_771_cast_fp16")];
+            tensor<fp16, []> var_772_to_fp16 = const()[name = tensor<string, []>("op_772_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_43_cast_fp16 = mul(x = var_771_cast_fp16, y = var_772_to_fp16)[name = tensor<string, []>("aw_chunk_43_cast_fp16")];
+            tensor<string, []> var_775_equation_0 = const()[name = tensor<string, []>("op_775_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_775_cast_fp16 = einsum(equation = var_775_equation_0, values = (var_613_cast_fp16, var_413_cast_fp16))[name = tensor<string, []>("op_775_cast_fp16")];
+            tensor<fp16, []> var_776_to_fp16 = const()[name = tensor<string, []>("op_776_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_45_cast_fp16 = mul(x = var_775_cast_fp16, y = var_776_to_fp16)[name = tensor<string, []>("aw_chunk_45_cast_fp16")];
+            tensor<string, []> var_779_equation_0 = const()[name = tensor<string, []>("op_779_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_779_cast_fp16 = einsum(equation = var_779_equation_0, values = (var_613_cast_fp16, var_420_cast_fp16))[name = tensor<string, []>("op_779_cast_fp16")];
+            tensor<fp16, []> var_780_to_fp16 = const()[name = tensor<string, []>("op_780_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_47_cast_fp16 = mul(x = var_779_cast_fp16, y = var_780_to_fp16)[name = tensor<string, []>("aw_chunk_47_cast_fp16")];
+            tensor<string, []> var_783_equation_0 = const()[name = tensor<string, []>("op_783_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_783_cast_fp16 = einsum(equation = var_783_equation_0, values = (var_617_cast_fp16, var_427_cast_fp16))[name = tensor<string, []>("op_783_cast_fp16")];
+            tensor<fp16, []> var_784_to_fp16 = const()[name = tensor<string, []>("op_784_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_49_cast_fp16 = mul(x = var_783_cast_fp16, y = var_784_to_fp16)[name = tensor<string, []>("aw_chunk_49_cast_fp16")];
+            tensor<string, []> var_787_equation_0 = const()[name = tensor<string, []>("op_787_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_787_cast_fp16 = einsum(equation = var_787_equation_0, values = (var_617_cast_fp16, var_434_cast_fp16))[name = tensor<string, []>("op_787_cast_fp16")];
+            tensor<fp16, []> var_788_to_fp16 = const()[name = tensor<string, []>("op_788_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_51_cast_fp16 = mul(x = var_787_cast_fp16, y = var_788_to_fp16)[name = tensor<string, []>("aw_chunk_51_cast_fp16")];
+            tensor<string, []> var_791_equation_0 = const()[name = tensor<string, []>("op_791_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_791_cast_fp16 = einsum(equation = var_791_equation_0, values = (var_617_cast_fp16, var_441_cast_fp16))[name = tensor<string, []>("op_791_cast_fp16")];
+            tensor<fp16, []> var_792_to_fp16 = const()[name = tensor<string, []>("op_792_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_53_cast_fp16 = mul(x = var_791_cast_fp16, y = var_792_to_fp16)[name = tensor<string, []>("aw_chunk_53_cast_fp16")];
+            tensor<string, []> var_795_equation_0 = const()[name = tensor<string, []>("op_795_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_795_cast_fp16 = einsum(equation = var_795_equation_0, values = (var_617_cast_fp16, var_448_cast_fp16))[name = tensor<string, []>("op_795_cast_fp16")];
+            tensor<fp16, []> var_796_to_fp16 = const()[name = tensor<string, []>("op_796_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_55_cast_fp16 = mul(x = var_795_cast_fp16, y = var_796_to_fp16)[name = tensor<string, []>("aw_chunk_55_cast_fp16")];
+            tensor<string, []> var_799_equation_0 = const()[name = tensor<string, []>("op_799_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_799_cast_fp16 = einsum(equation = var_799_equation_0, values = (var_621_cast_fp16, var_455_cast_fp16))[name = tensor<string, []>("op_799_cast_fp16")];
+            tensor<fp16, []> var_800_to_fp16 = const()[name = tensor<string, []>("op_800_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_57_cast_fp16 = mul(x = var_799_cast_fp16, y = var_800_to_fp16)[name = tensor<string, []>("aw_chunk_57_cast_fp16")];
+            tensor<string, []> var_803_equation_0 = const()[name = tensor<string, []>("op_803_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_803_cast_fp16 = einsum(equation = var_803_equation_0, values = (var_621_cast_fp16, var_462_cast_fp16))[name = tensor<string, []>("op_803_cast_fp16")];
+            tensor<fp16, []> var_804_to_fp16 = const()[name = tensor<string, []>("op_804_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_59_cast_fp16 = mul(x = var_803_cast_fp16, y = var_804_to_fp16)[name = tensor<string, []>("aw_chunk_59_cast_fp16")];
+            tensor<string, []> var_807_equation_0 = const()[name = tensor<string, []>("op_807_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_807_cast_fp16 = einsum(equation = var_807_equation_0, values = (var_621_cast_fp16, var_469_cast_fp16))[name = tensor<string, []>("op_807_cast_fp16")];
+            tensor<fp16, []> var_808_to_fp16 = const()[name = tensor<string, []>("op_808_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_61_cast_fp16 = mul(x = var_807_cast_fp16, y = var_808_to_fp16)[name = tensor<string, []>("aw_chunk_61_cast_fp16")];
+            tensor<string, []> var_811_equation_0 = const()[name = tensor<string, []>("op_811_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_811_cast_fp16 = einsum(equation = var_811_equation_0, values = (var_621_cast_fp16, var_476_cast_fp16))[name = tensor<string, []>("op_811_cast_fp16")];
+            tensor<fp16, []> var_812_to_fp16 = const()[name = tensor<string, []>("op_812_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_63_cast_fp16 = mul(x = var_811_cast_fp16, y = var_812_to_fp16)[name = tensor<string, []>("aw_chunk_63_cast_fp16")];
+            tensor<string, []> var_815_equation_0 = const()[name = tensor<string, []>("op_815_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_815_cast_fp16 = einsum(equation = var_815_equation_0, values = (var_625_cast_fp16, var_483_cast_fp16))[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<fp16, []> var_816_to_fp16 = const()[name = tensor<string, []>("op_816_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_65_cast_fp16 = mul(x = var_815_cast_fp16, y = var_816_to_fp16)[name = tensor<string, []>("aw_chunk_65_cast_fp16")];
+            tensor<string, []> var_819_equation_0 = const()[name = tensor<string, []>("op_819_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_819_cast_fp16 = einsum(equation = var_819_equation_0, values = (var_625_cast_fp16, var_490_cast_fp16))[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, []> var_820_to_fp16 = const()[name = tensor<string, []>("op_820_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_67_cast_fp16 = mul(x = var_819_cast_fp16, y = var_820_to_fp16)[name = tensor<string, []>("aw_chunk_67_cast_fp16")];
+            tensor<string, []> var_823_equation_0 = const()[name = tensor<string, []>("op_823_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_823_cast_fp16 = einsum(equation = var_823_equation_0, values = (var_625_cast_fp16, var_497_cast_fp16))[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, []> var_824_to_fp16 = const()[name = tensor<string, []>("op_824_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_69_cast_fp16 = mul(x = var_823_cast_fp16, y = var_824_to_fp16)[name = tensor<string, []>("aw_chunk_69_cast_fp16")];
+            tensor<string, []> var_827_equation_0 = const()[name = tensor<string, []>("op_827_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_827_cast_fp16 = einsum(equation = var_827_equation_0, values = (var_625_cast_fp16, var_504_cast_fp16))[name = tensor<string, []>("op_827_cast_fp16")];
+            tensor<fp16, []> var_828_to_fp16 = const()[name = tensor<string, []>("op_828_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_71_cast_fp16 = mul(x = var_827_cast_fp16, y = var_828_to_fp16)[name = tensor<string, []>("aw_chunk_71_cast_fp16")];
+            tensor<string, []> var_831_equation_0 = const()[name = tensor<string, []>("op_831_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_831_cast_fp16 = einsum(equation = var_831_equation_0, values = (var_629_cast_fp16, var_511_cast_fp16))[name = tensor<string, []>("op_831_cast_fp16")];
+            tensor<fp16, []> var_832_to_fp16 = const()[name = tensor<string, []>("op_832_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_73_cast_fp16 = mul(x = var_831_cast_fp16, y = var_832_to_fp16)[name = tensor<string, []>("aw_chunk_73_cast_fp16")];
+            tensor<string, []> var_835_equation_0 = const()[name = tensor<string, []>("op_835_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_835_cast_fp16 = einsum(equation = var_835_equation_0, values = (var_629_cast_fp16, var_518_cast_fp16))[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<fp16, []> var_836_to_fp16 = const()[name = tensor<string, []>("op_836_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_75_cast_fp16 = mul(x = var_835_cast_fp16, y = var_836_to_fp16)[name = tensor<string, []>("aw_chunk_75_cast_fp16")];
+            tensor<string, []> var_839_equation_0 = const()[name = tensor<string, []>("op_839_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_839_cast_fp16 = einsum(equation = var_839_equation_0, values = (var_629_cast_fp16, var_525_cast_fp16))[name = tensor<string, []>("op_839_cast_fp16")];
+            tensor<fp16, []> var_840_to_fp16 = const()[name = tensor<string, []>("op_840_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_77_cast_fp16 = mul(x = var_839_cast_fp16, y = var_840_to_fp16)[name = tensor<string, []>("aw_chunk_77_cast_fp16")];
+            tensor<string, []> var_843_equation_0 = const()[name = tensor<string, []>("op_843_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_843_cast_fp16 = einsum(equation = var_843_equation_0, values = (var_629_cast_fp16, var_532_cast_fp16))[name = tensor<string, []>("op_843_cast_fp16")];
+            tensor<fp16, []> var_844_to_fp16 = const()[name = tensor<string, []>("op_844_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_79_cast_fp16 = mul(x = var_843_cast_fp16, y = var_844_to_fp16)[name = tensor<string, []>("aw_chunk_79_cast_fp16")];
+            tensor<string, []> var_847_equation_0 = const()[name = tensor<string, []>("op_847_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_847_cast_fp16 = einsum(equation = var_847_equation_0, values = (var_633_cast_fp16, var_539_cast_fp16))[name = tensor<string, []>("op_847_cast_fp16")];
+            tensor<fp16, []> var_848_to_fp16 = const()[name = tensor<string, []>("op_848_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_81_cast_fp16 = mul(x = var_847_cast_fp16, y = var_848_to_fp16)[name = tensor<string, []>("aw_chunk_81_cast_fp16")];
+            tensor<string, []> var_851_equation_0 = const()[name = tensor<string, []>("op_851_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_851_cast_fp16 = einsum(equation = var_851_equation_0, values = (var_633_cast_fp16, var_546_cast_fp16))[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<fp16, []> var_852_to_fp16 = const()[name = tensor<string, []>("op_852_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_83_cast_fp16 = mul(x = var_851_cast_fp16, y = var_852_to_fp16)[name = tensor<string, []>("aw_chunk_83_cast_fp16")];
+            tensor<string, []> var_855_equation_0 = const()[name = tensor<string, []>("op_855_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_855_cast_fp16 = einsum(equation = var_855_equation_0, values = (var_633_cast_fp16, var_553_cast_fp16))[name = tensor<string, []>("op_855_cast_fp16")];
+            tensor<fp16, []> var_856_to_fp16 = const()[name = tensor<string, []>("op_856_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_85_cast_fp16 = mul(x = var_855_cast_fp16, y = var_856_to_fp16)[name = tensor<string, []>("aw_chunk_85_cast_fp16")];
+            tensor<string, []> var_859_equation_0 = const()[name = tensor<string, []>("op_859_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_859_cast_fp16 = einsum(equation = var_859_equation_0, values = (var_633_cast_fp16, var_560_cast_fp16))[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<fp16, []> var_860_to_fp16 = const()[name = tensor<string, []>("op_860_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_87_cast_fp16 = mul(x = var_859_cast_fp16, y = var_860_to_fp16)[name = tensor<string, []>("aw_chunk_87_cast_fp16")];
+            tensor<string, []> var_863_equation_0 = const()[name = tensor<string, []>("op_863_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_863_cast_fp16 = einsum(equation = var_863_equation_0, values = (var_637_cast_fp16, var_567_cast_fp16))[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<fp16, []> var_864_to_fp16 = const()[name = tensor<string, []>("op_864_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_89_cast_fp16 = mul(x = var_863_cast_fp16, y = var_864_to_fp16)[name = tensor<string, []>("aw_chunk_89_cast_fp16")];
+            tensor<string, []> var_867_equation_0 = const()[name = tensor<string, []>("op_867_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_867_cast_fp16 = einsum(equation = var_867_equation_0, values = (var_637_cast_fp16, var_574_cast_fp16))[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<fp16, []> var_868_to_fp16 = const()[name = tensor<string, []>("op_868_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_91_cast_fp16 = mul(x = var_867_cast_fp16, y = var_868_to_fp16)[name = tensor<string, []>("aw_chunk_91_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_637_cast_fp16, var_581_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<fp16, []> var_872_to_fp16 = const()[name = tensor<string, []>("op_872_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_93_cast_fp16 = mul(x = var_871_cast_fp16, y = var_872_to_fp16)[name = tensor<string, []>("aw_chunk_93_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_637_cast_fp16, var_588_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<fp16, []> var_876_to_fp16 = const()[name = tensor<string, []>("op_876_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_95_cast_fp16 = mul(x = var_875_cast_fp16, y = var_876_to_fp16)[name = tensor<string, []>("aw_chunk_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_878_cast_fp16 = softmax(axis = var_151, x = aw_chunk_1_cast_fp16)[name = tensor<string, []>("op_878_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_879_cast_fp16 = softmax(axis = var_151, x = aw_chunk_3_cast_fp16)[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_880_cast_fp16 = softmax(axis = var_151, x = aw_chunk_5_cast_fp16)[name = tensor<string, []>("op_880_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_881_cast_fp16 = softmax(axis = var_151, x = aw_chunk_7_cast_fp16)[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_882_cast_fp16 = softmax(axis = var_151, x = aw_chunk_9_cast_fp16)[name = tensor<string, []>("op_882_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_883_cast_fp16 = softmax(axis = var_151, x = aw_chunk_11_cast_fp16)[name = tensor<string, []>("op_883_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_884_cast_fp16 = softmax(axis = var_151, x = aw_chunk_13_cast_fp16)[name = tensor<string, []>("op_884_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_885_cast_fp16 = softmax(axis = var_151, x = aw_chunk_15_cast_fp16)[name = tensor<string, []>("op_885_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_886_cast_fp16 = softmax(axis = var_151, x = aw_chunk_17_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_887_cast_fp16 = softmax(axis = var_151, x = aw_chunk_19_cast_fp16)[name = tensor<string, []>("op_887_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_888_cast_fp16 = softmax(axis = var_151, x = aw_chunk_21_cast_fp16)[name = tensor<string, []>("op_888_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_889_cast_fp16 = softmax(axis = var_151, x = aw_chunk_23_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_890_cast_fp16 = softmax(axis = var_151, x = aw_chunk_25_cast_fp16)[name = tensor<string, []>("op_890_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_891_cast_fp16 = softmax(axis = var_151, x = aw_chunk_27_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_892_cast_fp16 = softmax(axis = var_151, x = aw_chunk_29_cast_fp16)[name = tensor<string, []>("op_892_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_893_cast_fp16 = softmax(axis = var_151, x = aw_chunk_31_cast_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_894_cast_fp16 = softmax(axis = var_151, x = aw_chunk_33_cast_fp16)[name = tensor<string, []>("op_894_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_895_cast_fp16 = softmax(axis = var_151, x = aw_chunk_35_cast_fp16)[name = tensor<string, []>("op_895_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_896_cast_fp16 = softmax(axis = var_151, x = aw_chunk_37_cast_fp16)[name = tensor<string, []>("op_896_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_897_cast_fp16 = softmax(axis = var_151, x = aw_chunk_39_cast_fp16)[name = tensor<string, []>("op_897_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_898_cast_fp16 = softmax(axis = var_151, x = aw_chunk_41_cast_fp16)[name = tensor<string, []>("op_898_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_899_cast_fp16 = softmax(axis = var_151, x = aw_chunk_43_cast_fp16)[name = tensor<string, []>("op_899_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_900_cast_fp16 = softmax(axis = var_151, x = aw_chunk_45_cast_fp16)[name = tensor<string, []>("op_900_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_901_cast_fp16 = softmax(axis = var_151, x = aw_chunk_47_cast_fp16)[name = tensor<string, []>("op_901_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_902_cast_fp16 = softmax(axis = var_151, x = aw_chunk_49_cast_fp16)[name = tensor<string, []>("op_902_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_903_cast_fp16 = softmax(axis = var_151, x = aw_chunk_51_cast_fp16)[name = tensor<string, []>("op_903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_904_cast_fp16 = softmax(axis = var_151, x = aw_chunk_53_cast_fp16)[name = tensor<string, []>("op_904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_905_cast_fp16 = softmax(axis = var_151, x = aw_chunk_55_cast_fp16)[name = tensor<string, []>("op_905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_906_cast_fp16 = softmax(axis = var_151, x = aw_chunk_57_cast_fp16)[name = tensor<string, []>("op_906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_907_cast_fp16 = softmax(axis = var_151, x = aw_chunk_59_cast_fp16)[name = tensor<string, []>("op_907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_908_cast_fp16 = softmax(axis = var_151, x = aw_chunk_61_cast_fp16)[name = tensor<string, []>("op_908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_909_cast_fp16 = softmax(axis = var_151, x = aw_chunk_63_cast_fp16)[name = tensor<string, []>("op_909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_910_cast_fp16 = softmax(axis = var_151, x = aw_chunk_65_cast_fp16)[name = tensor<string, []>("op_910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_911_cast_fp16 = softmax(axis = var_151, x = aw_chunk_67_cast_fp16)[name = tensor<string, []>("op_911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_912_cast_fp16 = softmax(axis = var_151, x = aw_chunk_69_cast_fp16)[name = tensor<string, []>("op_912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_913_cast_fp16 = softmax(axis = var_151, x = aw_chunk_71_cast_fp16)[name = tensor<string, []>("op_913_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_914_cast_fp16 = softmax(axis = var_151, x = aw_chunk_73_cast_fp16)[name = tensor<string, []>("op_914_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_915_cast_fp16 = softmax(axis = var_151, x = aw_chunk_75_cast_fp16)[name = tensor<string, []>("op_915_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_916_cast_fp16 = softmax(axis = var_151, x = aw_chunk_77_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_917_cast_fp16 = softmax(axis = var_151, x = aw_chunk_79_cast_fp16)[name = tensor<string, []>("op_917_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_918_cast_fp16 = softmax(axis = var_151, x = aw_chunk_81_cast_fp16)[name = tensor<string, []>("op_918_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_919_cast_fp16 = softmax(axis = var_151, x = aw_chunk_83_cast_fp16)[name = tensor<string, []>("op_919_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_920_cast_fp16 = softmax(axis = var_151, x = aw_chunk_85_cast_fp16)[name = tensor<string, []>("op_920_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_921_cast_fp16 = softmax(axis = var_151, x = aw_chunk_87_cast_fp16)[name = tensor<string, []>("op_921_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_922_cast_fp16 = softmax(axis = var_151, x = aw_chunk_89_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_923_cast_fp16 = softmax(axis = var_151, x = aw_chunk_91_cast_fp16)[name = tensor<string, []>("op_923_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_924_cast_fp16 = softmax(axis = var_151, x = aw_chunk_93_cast_fp16)[name = tensor<string, []>("op_924_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_925_cast_fp16 = softmax(axis = var_151, x = aw_chunk_95_cast_fp16)[name = tensor<string, []>("op_925_cast_fp16")];
+            tensor<string, []> var_927_equation_0 = const()[name = tensor<string, []>("op_927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_927_cast_fp16 = einsum(equation = var_927_equation_0, values = (var_639_cast_fp16, var_878_cast_fp16))[name = tensor<string, []>("op_927_cast_fp16")];
+            tensor<string, []> var_929_equation_0 = const()[name = tensor<string, []>("op_929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_929_cast_fp16 = einsum(equation = var_929_equation_0, values = (var_639_cast_fp16, var_879_cast_fp16))[name = tensor<string, []>("op_929_cast_fp16")];
+            tensor<string, []> var_931_equation_0 = const()[name = tensor<string, []>("op_931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_931_cast_fp16 = einsum(equation = var_931_equation_0, values = (var_639_cast_fp16, var_880_cast_fp16))[name = tensor<string, []>("op_931_cast_fp16")];
+            tensor<string, []> var_933_equation_0 = const()[name = tensor<string, []>("op_933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_933_cast_fp16 = einsum(equation = var_933_equation_0, values = (var_639_cast_fp16, var_881_cast_fp16))[name = tensor<string, []>("op_933_cast_fp16")];
+            tensor<string, []> var_935_equation_0 = const()[name = tensor<string, []>("op_935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_935_cast_fp16 = einsum(equation = var_935_equation_0, values = (var_643_cast_fp16, var_882_cast_fp16))[name = tensor<string, []>("op_935_cast_fp16")];
+            tensor<string, []> var_937_equation_0 = const()[name = tensor<string, []>("op_937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_937_cast_fp16 = einsum(equation = var_937_equation_0, values = (var_643_cast_fp16, var_883_cast_fp16))[name = tensor<string, []>("op_937_cast_fp16")];
+            tensor<string, []> var_939_equation_0 = const()[name = tensor<string, []>("op_939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_939_cast_fp16 = einsum(equation = var_939_equation_0, values = (var_643_cast_fp16, var_884_cast_fp16))[name = tensor<string, []>("op_939_cast_fp16")];
+            tensor<string, []> var_941_equation_0 = const()[name = tensor<string, []>("op_941_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_941_cast_fp16 = einsum(equation = var_941_equation_0, values = (var_643_cast_fp16, var_885_cast_fp16))[name = tensor<string, []>("op_941_cast_fp16")];
+            tensor<string, []> var_943_equation_0 = const()[name = tensor<string, []>("op_943_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_943_cast_fp16 = einsum(equation = var_943_equation_0, values = (var_647_cast_fp16, var_886_cast_fp16))[name = tensor<string, []>("op_943_cast_fp16")];
+            tensor<string, []> var_945_equation_0 = const()[name = tensor<string, []>("op_945_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_945_cast_fp16 = einsum(equation = var_945_equation_0, values = (var_647_cast_fp16, var_887_cast_fp16))[name = tensor<string, []>("op_945_cast_fp16")];
+            tensor<string, []> var_947_equation_0 = const()[name = tensor<string, []>("op_947_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_947_cast_fp16 = einsum(equation = var_947_equation_0, values = (var_647_cast_fp16, var_888_cast_fp16))[name = tensor<string, []>("op_947_cast_fp16")];
+            tensor<string, []> var_949_equation_0 = const()[name = tensor<string, []>("op_949_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_949_cast_fp16 = einsum(equation = var_949_equation_0, values = (var_647_cast_fp16, var_889_cast_fp16))[name = tensor<string, []>("op_949_cast_fp16")];
+            tensor<string, []> var_951_equation_0 = const()[name = tensor<string, []>("op_951_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_951_cast_fp16 = einsum(equation = var_951_equation_0, values = (var_651_cast_fp16, var_890_cast_fp16))[name = tensor<string, []>("op_951_cast_fp16")];
+            tensor<string, []> var_953_equation_0 = const()[name = tensor<string, []>("op_953_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_953_cast_fp16 = einsum(equation = var_953_equation_0, values = (var_651_cast_fp16, var_891_cast_fp16))[name = tensor<string, []>("op_953_cast_fp16")];
+            tensor<string, []> var_955_equation_0 = const()[name = tensor<string, []>("op_955_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_955_cast_fp16 = einsum(equation = var_955_equation_0, values = (var_651_cast_fp16, var_892_cast_fp16))[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<string, []> var_957_equation_0 = const()[name = tensor<string, []>("op_957_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_957_cast_fp16 = einsum(equation = var_957_equation_0, values = (var_651_cast_fp16, var_893_cast_fp16))[name = tensor<string, []>("op_957_cast_fp16")];
+            tensor<string, []> var_959_equation_0 = const()[name = tensor<string, []>("op_959_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_959_cast_fp16 = einsum(equation = var_959_equation_0, values = (var_655_cast_fp16, var_894_cast_fp16))[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<string, []> var_961_equation_0 = const()[name = tensor<string, []>("op_961_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_961_cast_fp16 = einsum(equation = var_961_equation_0, values = (var_655_cast_fp16, var_895_cast_fp16))[name = tensor<string, []>("op_961_cast_fp16")];
+            tensor<string, []> var_963_equation_0 = const()[name = tensor<string, []>("op_963_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_963_cast_fp16 = einsum(equation = var_963_equation_0, values = (var_655_cast_fp16, var_896_cast_fp16))[name = tensor<string, []>("op_963_cast_fp16")];
+            tensor<string, []> var_965_equation_0 = const()[name = tensor<string, []>("op_965_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_965_cast_fp16 = einsum(equation = var_965_equation_0, values = (var_655_cast_fp16, var_897_cast_fp16))[name = tensor<string, []>("op_965_cast_fp16")];
+            tensor<string, []> var_967_equation_0 = const()[name = tensor<string, []>("op_967_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_967_cast_fp16 = einsum(equation = var_967_equation_0, values = (var_659_cast_fp16, var_898_cast_fp16))[name = tensor<string, []>("op_967_cast_fp16")];
+            tensor<string, []> var_969_equation_0 = const()[name = tensor<string, []>("op_969_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_969_cast_fp16 = einsum(equation = var_969_equation_0, values = (var_659_cast_fp16, var_899_cast_fp16))[name = tensor<string, []>("op_969_cast_fp16")];
+            tensor<string, []> var_971_equation_0 = const()[name = tensor<string, []>("op_971_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_971_cast_fp16 = einsum(equation = var_971_equation_0, values = (var_659_cast_fp16, var_900_cast_fp16))[name = tensor<string, []>("op_971_cast_fp16")];
+            tensor<string, []> var_973_equation_0 = const()[name = tensor<string, []>("op_973_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_973_cast_fp16 = einsum(equation = var_973_equation_0, values = (var_659_cast_fp16, var_901_cast_fp16))[name = tensor<string, []>("op_973_cast_fp16")];
+            tensor<string, []> var_975_equation_0 = const()[name = tensor<string, []>("op_975_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_975_cast_fp16 = einsum(equation = var_975_equation_0, values = (var_663_cast_fp16, var_902_cast_fp16))[name = tensor<string, []>("op_975_cast_fp16")];
+            tensor<string, []> var_977_equation_0 = const()[name = tensor<string, []>("op_977_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_977_cast_fp16 = einsum(equation = var_977_equation_0, values = (var_663_cast_fp16, var_903_cast_fp16))[name = tensor<string, []>("op_977_cast_fp16")];
+            tensor<string, []> var_979_equation_0 = const()[name = tensor<string, []>("op_979_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_979_cast_fp16 = einsum(equation = var_979_equation_0, values = (var_663_cast_fp16, var_904_cast_fp16))[name = tensor<string, []>("op_979_cast_fp16")];
+            tensor<string, []> var_981_equation_0 = const()[name = tensor<string, []>("op_981_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_981_cast_fp16 = einsum(equation = var_981_equation_0, values = (var_663_cast_fp16, var_905_cast_fp16))[name = tensor<string, []>("op_981_cast_fp16")];
+            tensor<string, []> var_983_equation_0 = const()[name = tensor<string, []>("op_983_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_983_cast_fp16 = einsum(equation = var_983_equation_0, values = (var_667_cast_fp16, var_906_cast_fp16))[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<string, []> var_985_equation_0 = const()[name = tensor<string, []>("op_985_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_985_cast_fp16 = einsum(equation = var_985_equation_0, values = (var_667_cast_fp16, var_907_cast_fp16))[name = tensor<string, []>("op_985_cast_fp16")];
+            tensor<string, []> var_987_equation_0 = const()[name = tensor<string, []>("op_987_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_987_cast_fp16 = einsum(equation = var_987_equation_0, values = (var_667_cast_fp16, var_908_cast_fp16))[name = tensor<string, []>("op_987_cast_fp16")];
+            tensor<string, []> var_989_equation_0 = const()[name = tensor<string, []>("op_989_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_989_cast_fp16 = einsum(equation = var_989_equation_0, values = (var_667_cast_fp16, var_909_cast_fp16))[name = tensor<string, []>("op_989_cast_fp16")];
+            tensor<string, []> var_991_equation_0 = const()[name = tensor<string, []>("op_991_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_991_cast_fp16 = einsum(equation = var_991_equation_0, values = (var_671_cast_fp16, var_910_cast_fp16))[name = tensor<string, []>("op_991_cast_fp16")];
+            tensor<string, []> var_993_equation_0 = const()[name = tensor<string, []>("op_993_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_993_cast_fp16 = einsum(equation = var_993_equation_0, values = (var_671_cast_fp16, var_911_cast_fp16))[name = tensor<string, []>("op_993_cast_fp16")];
+            tensor<string, []> var_995_equation_0 = const()[name = tensor<string, []>("op_995_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_995_cast_fp16 = einsum(equation = var_995_equation_0, values = (var_671_cast_fp16, var_912_cast_fp16))[name = tensor<string, []>("op_995_cast_fp16")];
+            tensor<string, []> var_997_equation_0 = const()[name = tensor<string, []>("op_997_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_997_cast_fp16 = einsum(equation = var_997_equation_0, values = (var_671_cast_fp16, var_913_cast_fp16))[name = tensor<string, []>("op_997_cast_fp16")];
+            tensor<string, []> var_999_equation_0 = const()[name = tensor<string, []>("op_999_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_999_cast_fp16 = einsum(equation = var_999_equation_0, values = (var_675_cast_fp16, var_914_cast_fp16))[name = tensor<string, []>("op_999_cast_fp16")];
+            tensor<string, []> var_1001_equation_0 = const()[name = tensor<string, []>("op_1001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1001_cast_fp16 = einsum(equation = var_1001_equation_0, values = (var_675_cast_fp16, var_915_cast_fp16))[name = tensor<string, []>("op_1001_cast_fp16")];
+            tensor<string, []> var_1003_equation_0 = const()[name = tensor<string, []>("op_1003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1003_cast_fp16 = einsum(equation = var_1003_equation_0, values = (var_675_cast_fp16, var_916_cast_fp16))[name = tensor<string, []>("op_1003_cast_fp16")];
+            tensor<string, []> var_1005_equation_0 = const()[name = tensor<string, []>("op_1005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1005_cast_fp16 = einsum(equation = var_1005_equation_0, values = (var_675_cast_fp16, var_917_cast_fp16))[name = tensor<string, []>("op_1005_cast_fp16")];
+            tensor<string, []> var_1007_equation_0 = const()[name = tensor<string, []>("op_1007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1007_cast_fp16 = einsum(equation = var_1007_equation_0, values = (var_679_cast_fp16, var_918_cast_fp16))[name = tensor<string, []>("op_1007_cast_fp16")];
+            tensor<string, []> var_1009_equation_0 = const()[name = tensor<string, []>("op_1009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1009_cast_fp16 = einsum(equation = var_1009_equation_0, values = (var_679_cast_fp16, var_919_cast_fp16))[name = tensor<string, []>("op_1009_cast_fp16")];
+            tensor<string, []> var_1011_equation_0 = const()[name = tensor<string, []>("op_1011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1011_cast_fp16 = einsum(equation = var_1011_equation_0, values = (var_679_cast_fp16, var_920_cast_fp16))[name = tensor<string, []>("op_1011_cast_fp16")];
+            tensor<string, []> var_1013_equation_0 = const()[name = tensor<string, []>("op_1013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1013_cast_fp16 = einsum(equation = var_1013_equation_0, values = (var_679_cast_fp16, var_921_cast_fp16))[name = tensor<string, []>("op_1013_cast_fp16")];
+            tensor<string, []> var_1015_equation_0 = const()[name = tensor<string, []>("op_1015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1015_cast_fp16 = einsum(equation = var_1015_equation_0, values = (var_683_cast_fp16, var_922_cast_fp16))[name = tensor<string, []>("op_1015_cast_fp16")];
+            tensor<string, []> var_1017_equation_0 = const()[name = tensor<string, []>("op_1017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1017_cast_fp16 = einsum(equation = var_1017_equation_0, values = (var_683_cast_fp16, var_923_cast_fp16))[name = tensor<string, []>("op_1017_cast_fp16")];
+            tensor<string, []> var_1019_equation_0 = const()[name = tensor<string, []>("op_1019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1019_cast_fp16 = einsum(equation = var_1019_equation_0, values = (var_683_cast_fp16, var_924_cast_fp16))[name = tensor<string, []>("op_1019_cast_fp16")];
+            tensor<string, []> var_1021_equation_0 = const()[name = tensor<string, []>("op_1021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1021_cast_fp16 = einsum(equation = var_1021_equation_0, values = (var_683_cast_fp16, var_925_cast_fp16))[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<bool, []> var_1023_interleave_0 = const()[name = tensor<string, []>("op_1023_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1023_cast_fp16 = concat(axis = var_134, interleave = var_1023_interleave_0, values = (var_927_cast_fp16, var_929_cast_fp16, var_931_cast_fp16, var_933_cast_fp16))[name = tensor<string, []>("op_1023_cast_fp16")];
+            tensor<bool, []> var_1025_interleave_0 = const()[name = tensor<string, []>("op_1025_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1025_cast_fp16 = concat(axis = var_134, interleave = var_1025_interleave_0, values = (var_935_cast_fp16, var_937_cast_fp16, var_939_cast_fp16, var_941_cast_fp16))[name = tensor<string, []>("op_1025_cast_fp16")];
+            tensor<bool, []> var_1027_interleave_0 = const()[name = tensor<string, []>("op_1027_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1027_cast_fp16 = concat(axis = var_134, interleave = var_1027_interleave_0, values = (var_943_cast_fp16, var_945_cast_fp16, var_947_cast_fp16, var_949_cast_fp16))[name = tensor<string, []>("op_1027_cast_fp16")];
+            tensor<bool, []> var_1029_interleave_0 = const()[name = tensor<string, []>("op_1029_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1029_cast_fp16 = concat(axis = var_134, interleave = var_1029_interleave_0, values = (var_951_cast_fp16, var_953_cast_fp16, var_955_cast_fp16, var_957_cast_fp16))[name = tensor<string, []>("op_1029_cast_fp16")];
+            tensor<bool, []> var_1031_interleave_0 = const()[name = tensor<string, []>("op_1031_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1031_cast_fp16 = concat(axis = var_134, interleave = var_1031_interleave_0, values = (var_959_cast_fp16, var_961_cast_fp16, var_963_cast_fp16, var_965_cast_fp16))[name = tensor<string, []>("op_1031_cast_fp16")];
+            tensor<bool, []> var_1033_interleave_0 = const()[name = tensor<string, []>("op_1033_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1033_cast_fp16 = concat(axis = var_134, interleave = var_1033_interleave_0, values = (var_967_cast_fp16, var_969_cast_fp16, var_971_cast_fp16, var_973_cast_fp16))[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<bool, []> var_1035_interleave_0 = const()[name = tensor<string, []>("op_1035_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1035_cast_fp16 = concat(axis = var_134, interleave = var_1035_interleave_0, values = (var_975_cast_fp16, var_977_cast_fp16, var_979_cast_fp16, var_981_cast_fp16))[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<bool, []> var_1037_interleave_0 = const()[name = tensor<string, []>("op_1037_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1037_cast_fp16 = concat(axis = var_134, interleave = var_1037_interleave_0, values = (var_983_cast_fp16, var_985_cast_fp16, var_987_cast_fp16, var_989_cast_fp16))[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<bool, []> var_1039_interleave_0 = const()[name = tensor<string, []>("op_1039_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1039_cast_fp16 = concat(axis = var_134, interleave = var_1039_interleave_0, values = (var_991_cast_fp16, var_993_cast_fp16, var_995_cast_fp16, var_997_cast_fp16))[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<bool, []> var_1041_interleave_0 = const()[name = tensor<string, []>("op_1041_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1041_cast_fp16 = concat(axis = var_134, interleave = var_1041_interleave_0, values = (var_999_cast_fp16, var_1001_cast_fp16, var_1003_cast_fp16, var_1005_cast_fp16))[name = tensor<string, []>("op_1041_cast_fp16")];
+            tensor<bool, []> var_1043_interleave_0 = const()[name = tensor<string, []>("op_1043_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1043_cast_fp16 = concat(axis = var_134, interleave = var_1043_interleave_0, values = (var_1007_cast_fp16, var_1009_cast_fp16, var_1011_cast_fp16, var_1013_cast_fp16))[name = tensor<string, []>("op_1043_cast_fp16")];
+            tensor<bool, []> var_1045_interleave_0 = const()[name = tensor<string, []>("op_1045_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1045_cast_fp16 = concat(axis = var_134, interleave = var_1045_interleave_0, values = (var_1015_cast_fp16, var_1017_cast_fp16, var_1019_cast_fp16, var_1021_cast_fp16))[name = tensor<string, []>("op_1045_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_1_cast_fp16 = concat(axis = var_151, interleave = input_1_interleave_0, values = (var_1023_cast_fp16, var_1025_cast_fp16, var_1027_cast_fp16, var_1029_cast_fp16, var_1031_cast_fp16, var_1033_cast_fp16, var_1035_cast_fp16, var_1037_cast_fp16, var_1039_cast_fp16, var_1041_cast_fp16, var_1043_cast_fp16, var_1045_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_1050 = const()[name = tensor<string, []>("op_1050"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1052 = const()[name = tensor<string, []>("op_1052"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9763776)))];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10943488)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_1052, groups = var_151, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = var_1050, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_1058 = const()[name = tensor<string, []>("op_1058"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_1058, keep_dims = var_152, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_1062 = const()[name = tensor<string, []>("op_1062"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1063_cast_fp16 = reduce_mean(axes = var_1062, keep_dims = var_152, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<fp16, []> var_1064_to_fp16 = const()[name = tensor<string, []>("op_1064_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1065_cast_fp16 = add(x = var_1063_cast_fp16, y = var_1064_to_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_1065_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10945088)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10946688)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_1076 = const()[name = tensor<string, []>("op_1076"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1078 = const()[name = tensor<string, []>("op_1078"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10948288)))];
+            tensor<fp16, [3072]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15666944)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_1078, groups = var_151, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = var_1076, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<int32, [2]> var_1084 = const()[name = tensor<string, []>("op_1084"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1086 = const()[name = tensor<string, []>("op_1086"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15673152)))];
+            tensor<fp16, [768]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20391808)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_1086, groups = var_151, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_1084, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_1093 = const()[name = tensor<string, []>("op_1093"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1110 = const()[name = tensor<string, []>("op_1110"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1111 = const()[name = tensor<string, []>("op_1111"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1121 = const()[name = tensor<string, []>("op_1121"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_1121, keep_dims = var_1111, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_1125 = const()[name = tensor<string, []>("op_1125"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1126_cast_fp16 = reduce_mean(axes = var_1125, keep_dims = var_1111, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<fp16, []> var_1127_to_fp16 = const()[name = tensor<string, []>("op_1127_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1128_cast_fp16 = add(x = var_1126_cast_fp16, y = var_1127_to_fp16)[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_1128_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20393408)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20395008)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<int32, [2]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1145 = const()[name = tensor<string, []>("op_1145"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20396608)))];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21576320)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_1145, groups = var_1110, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_1143, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_1149 = const()[name = tensor<string, []>("op_1149"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1151 = const()[name = tensor<string, []>("op_1151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21577920)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_1151, groups = var_1110, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_1149, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_1156 = const()[name = tensor<string, []>("op_1156"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1158 = const()[name = tensor<string, []>("op_1158"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22757632)))];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23937344)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_1158, groups = var_1110, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_1156, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_1165_begin_0 = const()[name = tensor<string, []>("op_1165_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1165_end_0 = const()[name = tensor<string, []>("op_1165_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1165_end_mask_0 = const()[name = tensor<string, []>("op_1165_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1165_cast_fp16 = slice_by_index(begin = var_1165_begin_0, end = var_1165_end_0, end_mask = var_1165_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1165_cast_fp16")];
+            tensor<int32, [4]> var_1169_begin_0 = const()[name = tensor<string, []>("op_1169_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1169_end_0 = const()[name = tensor<string, []>("op_1169_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1169_end_mask_0 = const()[name = tensor<string, []>("op_1169_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1169_cast_fp16 = slice_by_index(begin = var_1169_begin_0, end = var_1169_end_0, end_mask = var_1169_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1169_cast_fp16")];
+            tensor<int32, [4]> var_1173_begin_0 = const()[name = tensor<string, []>("op_1173_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1173_end_0 = const()[name = tensor<string, []>("op_1173_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1173_end_mask_0 = const()[name = tensor<string, []>("op_1173_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1173_cast_fp16")];
+            tensor<int32, [4]> var_1177_begin_0 = const()[name = tensor<string, []>("op_1177_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1177_end_0 = const()[name = tensor<string, []>("op_1177_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1177_end_mask_0 = const()[name = tensor<string, []>("op_1177_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1177_cast_fp16 = slice_by_index(begin = var_1177_begin_0, end = var_1177_end_0, end_mask = var_1177_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1177_cast_fp16")];
+            tensor<int32, [4]> var_1181_begin_0 = const()[name = tensor<string, []>("op_1181_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1181_end_0 = const()[name = tensor<string, []>("op_1181_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1181_end_mask_0 = const()[name = tensor<string, []>("op_1181_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1181_cast_fp16 = slice_by_index(begin = var_1181_begin_0, end = var_1181_end_0, end_mask = var_1181_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1181_cast_fp16")];
+            tensor<int32, [4]> var_1185_begin_0 = const()[name = tensor<string, []>("op_1185_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1185_end_0 = const()[name = tensor<string, []>("op_1185_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1185_end_mask_0 = const()[name = tensor<string, []>("op_1185_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1185_cast_fp16 = slice_by_index(begin = var_1185_begin_0, end = var_1185_end_0, end_mask = var_1185_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1185_cast_fp16")];
+            tensor<int32, [4]> var_1189_begin_0 = const()[name = tensor<string, []>("op_1189_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1189_end_0 = const()[name = tensor<string, []>("op_1189_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1189_end_mask_0 = const()[name = tensor<string, []>("op_1189_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1189_cast_fp16 = slice_by_index(begin = var_1189_begin_0, end = var_1189_end_0, end_mask = var_1189_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1189_cast_fp16")];
+            tensor<int32, [4]> var_1193_begin_0 = const()[name = tensor<string, []>("op_1193_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1193_end_0 = const()[name = tensor<string, []>("op_1193_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1193_end_mask_0 = const()[name = tensor<string, []>("op_1193_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1193_cast_fp16 = slice_by_index(begin = var_1193_begin_0, end = var_1193_end_0, end_mask = var_1193_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<int32, [4]> var_1197_begin_0 = const()[name = tensor<string, []>("op_1197_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_1197_end_0 = const()[name = tensor<string, []>("op_1197_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_1197_end_mask_0 = const()[name = tensor<string, []>("op_1197_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1197_cast_fp16 = slice_by_index(begin = var_1197_begin_0, end = var_1197_end_0, end_mask = var_1197_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1197_cast_fp16")];
+            tensor<int32, [4]> var_1201_begin_0 = const()[name = tensor<string, []>("op_1201_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_1201_end_0 = const()[name = tensor<string, []>("op_1201_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_1201_end_mask_0 = const()[name = tensor<string, []>("op_1201_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1201_cast_fp16 = slice_by_index(begin = var_1201_begin_0, end = var_1201_end_0, end_mask = var_1201_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1201_cast_fp16")];
+            tensor<int32, [4]> var_1205_begin_0 = const()[name = tensor<string, []>("op_1205_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_1205_end_0 = const()[name = tensor<string, []>("op_1205_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_1205_end_mask_0 = const()[name = tensor<string, []>("op_1205_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1205_cast_fp16 = slice_by_index(begin = var_1205_begin_0, end = var_1205_end_0, end_mask = var_1205_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1205_cast_fp16")];
+            tensor<int32, [4]> var_1209_begin_0 = const()[name = tensor<string, []>("op_1209_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_1209_end_0 = const()[name = tensor<string, []>("op_1209_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_1209_end_mask_0 = const()[name = tensor<string, []>("op_1209_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1209_cast_fp16 = slice_by_index(begin = var_1209_begin_0, end = var_1209_end_0, end_mask = var_1209_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1209_cast_fp16")];
+            tensor<int32, [4]> var_1218_begin_0 = const()[name = tensor<string, []>("op_1218_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1218_end_0 = const()[name = tensor<string, []>("op_1218_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1218_end_mask_0 = const()[name = tensor<string, []>("op_1218_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1218_cast_fp16")];
+            tensor<int32, [4]> var_1225_begin_0 = const()[name = tensor<string, []>("op_1225_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1225_end_0 = const()[name = tensor<string, []>("op_1225_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1225_end_mask_0 = const()[name = tensor<string, []>("op_1225_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1225_cast_fp16 = slice_by_index(begin = var_1225_begin_0, end = var_1225_end_0, end_mask = var_1225_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<int32, [4]> var_1232_begin_0 = const()[name = tensor<string, []>("op_1232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1232_end_0 = const()[name = tensor<string, []>("op_1232_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1232_end_mask_0 = const()[name = tensor<string, []>("op_1232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1232_cast_fp16 = slice_by_index(begin = var_1232_begin_0, end = var_1232_end_0, end_mask = var_1232_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<int32, [4]> var_1239_begin_0 = const()[name = tensor<string, []>("op_1239_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1239_end_0 = const()[name = tensor<string, []>("op_1239_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1239_end_mask_0 = const()[name = tensor<string, []>("op_1239_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1239_cast_fp16 = slice_by_index(begin = var_1239_begin_0, end = var_1239_end_0, end_mask = var_1239_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1239_cast_fp16")];
+            tensor<int32, [4]> var_1246_begin_0 = const()[name = tensor<string, []>("op_1246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1246_end_0 = const()[name = tensor<string, []>("op_1246_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1246_end_mask_0 = const()[name = tensor<string, []>("op_1246_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<int32, [4]> var_1253_begin_0 = const()[name = tensor<string, []>("op_1253_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1253_end_0 = const()[name = tensor<string, []>("op_1253_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1253_end_mask_0 = const()[name = tensor<string, []>("op_1253_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1253_cast_fp16 = slice_by_index(begin = var_1253_begin_0, end = var_1253_end_0, end_mask = var_1253_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1253_cast_fp16")];
+            tensor<int32, [4]> var_1260_begin_0 = const()[name = tensor<string, []>("op_1260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1260_end_0 = const()[name = tensor<string, []>("op_1260_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1260_end_mask_0 = const()[name = tensor<string, []>("op_1260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1260_cast_fp16")];
+            tensor<int32, [4]> var_1267_begin_0 = const()[name = tensor<string, []>("op_1267_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1267_end_0 = const()[name = tensor<string, []>("op_1267_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1267_end_mask_0 = const()[name = tensor<string, []>("op_1267_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1267_cast_fp16 = slice_by_index(begin = var_1267_begin_0, end = var_1267_end_0, end_mask = var_1267_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1267_cast_fp16")];
+            tensor<int32, [4]> var_1274_begin_0 = const()[name = tensor<string, []>("op_1274_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1274_end_0 = const()[name = tensor<string, []>("op_1274_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1274_end_mask_0 = const()[name = tensor<string, []>("op_1274_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1274_cast_fp16 = slice_by_index(begin = var_1274_begin_0, end = var_1274_end_0, end_mask = var_1274_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<int32, [4]> var_1281_begin_0 = const()[name = tensor<string, []>("op_1281_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1281_end_0 = const()[name = tensor<string, []>("op_1281_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1281_end_mask_0 = const()[name = tensor<string, []>("op_1281_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1281_cast_fp16 = slice_by_index(begin = var_1281_begin_0, end = var_1281_end_0, end_mask = var_1281_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1281_cast_fp16")];
+            tensor<int32, [4]> var_1288_begin_0 = const()[name = tensor<string, []>("op_1288_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1288_end_0 = const()[name = tensor<string, []>("op_1288_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1288_end_mask_0 = const()[name = tensor<string, []>("op_1288_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1288_cast_fp16")];
+            tensor<int32, [4]> var_1295_begin_0 = const()[name = tensor<string, []>("op_1295_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1295_end_0 = const()[name = tensor<string, []>("op_1295_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1295_end_mask_0 = const()[name = tensor<string, []>("op_1295_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1295_cast_fp16 = slice_by_index(begin = var_1295_begin_0, end = var_1295_end_0, end_mask = var_1295_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1295_cast_fp16")];
+            tensor<int32, [4]> var_1302_begin_0 = const()[name = tensor<string, []>("op_1302_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1302_end_0 = const()[name = tensor<string, []>("op_1302_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1302_end_mask_0 = const()[name = tensor<string, []>("op_1302_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1302_cast_fp16 = slice_by_index(begin = var_1302_begin_0, end = var_1302_end_0, end_mask = var_1302_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1302_cast_fp16")];
+            tensor<int32, [4]> var_1309_begin_0 = const()[name = tensor<string, []>("op_1309_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1309_end_0 = const()[name = tensor<string, []>("op_1309_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1309_end_mask_0 = const()[name = tensor<string, []>("op_1309_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1309_cast_fp16 = slice_by_index(begin = var_1309_begin_0, end = var_1309_end_0, end_mask = var_1309_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1309_cast_fp16")];
+            tensor<int32, [4]> var_1316_begin_0 = const()[name = tensor<string, []>("op_1316_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1316_end_0 = const()[name = tensor<string, []>("op_1316_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1316_end_mask_0 = const()[name = tensor<string, []>("op_1316_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1316_cast_fp16")];
+            tensor<int32, [4]> var_1323_begin_0 = const()[name = tensor<string, []>("op_1323_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1323_end_0 = const()[name = tensor<string, []>("op_1323_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1323_end_mask_0 = const()[name = tensor<string, []>("op_1323_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = var_1323_end_0, end_mask = var_1323_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1323_cast_fp16")];
+            tensor<int32, [4]> var_1330_begin_0 = const()[name = tensor<string, []>("op_1330_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1330_end_0 = const()[name = tensor<string, []>("op_1330_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1330_end_mask_0 = const()[name = tensor<string, []>("op_1330_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1330_cast_fp16 = slice_by_index(begin = var_1330_begin_0, end = var_1330_end_0, end_mask = var_1330_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1330_cast_fp16")];
+            tensor<int32, [4]> var_1337_begin_0 = const()[name = tensor<string, []>("op_1337_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1337_end_0 = const()[name = tensor<string, []>("op_1337_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1337_end_mask_0 = const()[name = tensor<string, []>("op_1337_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1337_cast_fp16 = slice_by_index(begin = var_1337_begin_0, end = var_1337_end_0, end_mask = var_1337_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1337_cast_fp16")];
+            tensor<int32, [4]> var_1344_begin_0 = const()[name = tensor<string, []>("op_1344_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1344_end_0 = const()[name = tensor<string, []>("op_1344_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1344_end_mask_0 = const()[name = tensor<string, []>("op_1344_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
+            tensor<int32, [4]> var_1351_begin_0 = const()[name = tensor<string, []>("op_1351_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1351_end_0 = const()[name = tensor<string, []>("op_1351_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1351_end_mask_0 = const()[name = tensor<string, []>("op_1351_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1351_cast_fp16 = slice_by_index(begin = var_1351_begin_0, end = var_1351_end_0, end_mask = var_1351_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1351_cast_fp16")];
+            tensor<int32, [4]> var_1358_begin_0 = const()[name = tensor<string, []>("op_1358_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1358_end_0 = const()[name = tensor<string, []>("op_1358_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1358_end_mask_0 = const()[name = tensor<string, []>("op_1358_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1358_cast_fp16 = slice_by_index(begin = var_1358_begin_0, end = var_1358_end_0, end_mask = var_1358_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<int32, [4]> var_1365_begin_0 = const()[name = tensor<string, []>("op_1365_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1365_end_0 = const()[name = tensor<string, []>("op_1365_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1365_end_mask_0 = const()[name = tensor<string, []>("op_1365_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1365_cast_fp16 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<int32, [4]> var_1372_begin_0 = const()[name = tensor<string, []>("op_1372_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1372_end_0 = const()[name = tensor<string, []>("op_1372_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1372_end_mask_0 = const()[name = tensor<string, []>("op_1372_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<int32, [4]> var_1379_begin_0 = const()[name = tensor<string, []>("op_1379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1379_end_0 = const()[name = tensor<string, []>("op_1379_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1379_end_mask_0 = const()[name = tensor<string, []>("op_1379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1379_cast_fp16")];
+            tensor<int32, [4]> var_1386_begin_0 = const()[name = tensor<string, []>("op_1386_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1386_end_0 = const()[name = tensor<string, []>("op_1386_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1386_end_mask_0 = const()[name = tensor<string, []>("op_1386_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1386_cast_fp16")];
+            tensor<int32, [4]> var_1393_begin_0 = const()[name = tensor<string, []>("op_1393_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1393_end_0 = const()[name = tensor<string, []>("op_1393_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1393_end_mask_0 = const()[name = tensor<string, []>("op_1393_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1393_cast_fp16 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<int32, [4]> var_1400_begin_0 = const()[name = tensor<string, []>("op_1400_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1400_end_0 = const()[name = tensor<string, []>("op_1400_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1400_end_mask_0 = const()[name = tensor<string, []>("op_1400_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1400_cast_fp16 = slice_by_index(begin = var_1400_begin_0, end = var_1400_end_0, end_mask = var_1400_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1400_cast_fp16")];
+            tensor<int32, [4]> var_1407_begin_0 = const()[name = tensor<string, []>("op_1407_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1407_end_0 = const()[name = tensor<string, []>("op_1407_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1407_end_mask_0 = const()[name = tensor<string, []>("op_1407_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1407_cast_fp16 = slice_by_index(begin = var_1407_begin_0, end = var_1407_end_0, end_mask = var_1407_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<int32, [4]> var_1414_begin_0 = const()[name = tensor<string, []>("op_1414_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1414_end_0 = const()[name = tensor<string, []>("op_1414_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1414_end_mask_0 = const()[name = tensor<string, []>("op_1414_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1414_cast_fp16")];
+            tensor<int32, [4]> var_1421_begin_0 = const()[name = tensor<string, []>("op_1421_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1421_end_0 = const()[name = tensor<string, []>("op_1421_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1421_end_mask_0 = const()[name = tensor<string, []>("op_1421_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1421_cast_fp16 = slice_by_index(begin = var_1421_begin_0, end = var_1421_end_0, end_mask = var_1421_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1421_cast_fp16")];
+            tensor<int32, [4]> var_1428_begin_0 = const()[name = tensor<string, []>("op_1428_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1428_end_0 = const()[name = tensor<string, []>("op_1428_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1428_end_mask_0 = const()[name = tensor<string, []>("op_1428_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1428_cast_fp16 = slice_by_index(begin = var_1428_begin_0, end = var_1428_end_0, end_mask = var_1428_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1428_cast_fp16")];
+            tensor<int32, [4]> var_1435_begin_0 = const()[name = tensor<string, []>("op_1435_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1435_end_0 = const()[name = tensor<string, []>("op_1435_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1435_end_mask_0 = const()[name = tensor<string, []>("op_1435_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1435_cast_fp16 = slice_by_index(begin = var_1435_begin_0, end = var_1435_end_0, end_mask = var_1435_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<int32, [4]> var_1442_begin_0 = const()[name = tensor<string, []>("op_1442_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1442_end_0 = const()[name = tensor<string, []>("op_1442_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1442_end_mask_0 = const()[name = tensor<string, []>("op_1442_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1442_cast_fp16")];
+            tensor<int32, [4]> var_1449_begin_0 = const()[name = tensor<string, []>("op_1449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1449_end_0 = const()[name = tensor<string, []>("op_1449_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1449_end_mask_0 = const()[name = tensor<string, []>("op_1449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<int32, [4]> var_1456_begin_0 = const()[name = tensor<string, []>("op_1456_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1456_end_0 = const()[name = tensor<string, []>("op_1456_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1456_end_mask_0 = const()[name = tensor<string, []>("op_1456_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1456_cast_fp16 = slice_by_index(begin = var_1456_begin_0, end = var_1456_end_0, end_mask = var_1456_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1456_cast_fp16")];
+            tensor<int32, [4]> var_1463_begin_0 = const()[name = tensor<string, []>("op_1463_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1463_end_0 = const()[name = tensor<string, []>("op_1463_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1463_end_mask_0 = const()[name = tensor<string, []>("op_1463_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1463_cast_fp16 = slice_by_index(begin = var_1463_begin_0, end = var_1463_end_0, end_mask = var_1463_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1463_cast_fp16")];
+            tensor<int32, [4]> var_1470_begin_0 = const()[name = tensor<string, []>("op_1470_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1470_end_0 = const()[name = tensor<string, []>("op_1470_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1470_end_mask_0 = const()[name = tensor<string, []>("op_1470_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<int32, [4]> var_1477_begin_0 = const()[name = tensor<string, []>("op_1477_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1477_end_0 = const()[name = tensor<string, []>("op_1477_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1477_end_mask_0 = const()[name = tensor<string, []>("op_1477_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1477_cast_fp16 = slice_by_index(begin = var_1477_begin_0, end = var_1477_end_0, end_mask = var_1477_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1477_cast_fp16")];
+            tensor<int32, [4]> var_1484_begin_0 = const()[name = tensor<string, []>("op_1484_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1484_end_0 = const()[name = tensor<string, []>("op_1484_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1484_end_mask_0 = const()[name = tensor<string, []>("op_1484_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = var_1484_end_0, end_mask = var_1484_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1484_cast_fp16")];
+            tensor<int32, [4]> var_1491_begin_0 = const()[name = tensor<string, []>("op_1491_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1491_end_0 = const()[name = tensor<string, []>("op_1491_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1491_end_mask_0 = const()[name = tensor<string, []>("op_1491_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1491_cast_fp16 = slice_by_index(begin = var_1491_begin_0, end = var_1491_end_0, end_mask = var_1491_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1491_cast_fp16")];
+            tensor<int32, [4]> var_1498_begin_0 = const()[name = tensor<string, []>("op_1498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1498_end_0 = const()[name = tensor<string, []>("op_1498_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1498_end_mask_0 = const()[name = tensor<string, []>("op_1498_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
+            tensor<int32, [4]> var_1505_begin_0 = const()[name = tensor<string, []>("op_1505_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1505_end_0 = const()[name = tensor<string, []>("op_1505_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1505_end_mask_0 = const()[name = tensor<string, []>("op_1505_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = var_1505_end_0, end_mask = var_1505_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1505_cast_fp16")];
+            tensor<int32, [4]> var_1512_begin_0 = const()[name = tensor<string, []>("op_1512_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1512_end_0 = const()[name = tensor<string, []>("op_1512_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1512_end_mask_0 = const()[name = tensor<string, []>("op_1512_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1512_cast_fp16 = slice_by_index(begin = var_1512_begin_0, end = var_1512_end_0, end_mask = var_1512_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1512_cast_fp16")];
+            tensor<int32, [4]> var_1519_begin_0 = const()[name = tensor<string, []>("op_1519_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1519_end_0 = const()[name = tensor<string, []>("op_1519_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1519_end_mask_0 = const()[name = tensor<string, []>("op_1519_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1519_cast_fp16 = slice_by_index(begin = var_1519_begin_0, end = var_1519_end_0, end_mask = var_1519_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1519_cast_fp16")];
+            tensor<int32, [4]> var_1526_begin_0 = const()[name = tensor<string, []>("op_1526_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1526_end_0 = const()[name = tensor<string, []>("op_1526_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1526_end_mask_0 = const()[name = tensor<string, []>("op_1526_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1526_cast_fp16 = slice_by_index(begin = var_1526_begin_0, end = var_1526_end_0, end_mask = var_1526_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1526_cast_fp16")];
+            tensor<int32, [4]> var_1533_begin_0 = const()[name = tensor<string, []>("op_1533_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1533_end_0 = const()[name = tensor<string, []>("op_1533_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1533_end_mask_0 = const()[name = tensor<string, []>("op_1533_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = var_1533_end_0, end_mask = var_1533_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1533_cast_fp16")];
+            tensor<int32, [4]> var_1540_begin_0 = const()[name = tensor<string, []>("op_1540_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1540_end_0 = const()[name = tensor<string, []>("op_1540_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1540_end_mask_0 = const()[name = tensor<string, []>("op_1540_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1540_cast_fp16 = slice_by_index(begin = var_1540_begin_0, end = var_1540_end_0, end_mask = var_1540_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1540_cast_fp16")];
+            tensor<int32, [4]> var_1547_begin_0 = const()[name = tensor<string, []>("op_1547_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1547_end_0 = const()[name = tensor<string, []>("op_1547_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1547_end_mask_0 = const()[name = tensor<string, []>("op_1547_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1547_cast_fp16 = slice_by_index(begin = var_1547_begin_0, end = var_1547_end_0, end_mask = var_1547_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1547_cast_fp16")];
+            tensor<int32, [4]> k_3_perm_0 = const()[name = tensor<string, []>("k_3_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1552_begin_0 = const()[name = tensor<string, []>("op_1552_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1552_end_0 = const()[name = tensor<string, []>("op_1552_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1552_end_mask_0 = const()[name = tensor<string, []>("op_1552_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_10 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1552_cast_fp16 = slice_by_index(begin = var_1552_begin_0, end = var_1552_end_0, end_mask = var_1552_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1552_cast_fp16")];
+            tensor<int32, [4]> var_1556_begin_0 = const()[name = tensor<string, []>("op_1556_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1556_end_0 = const()[name = tensor<string, []>("op_1556_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1556_end_mask_0 = const()[name = tensor<string, []>("op_1556_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1556_cast_fp16 = slice_by_index(begin = var_1556_begin_0, end = var_1556_end_0, end_mask = var_1556_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1556_cast_fp16")];
+            tensor<int32, [4]> var_1560_begin_0 = const()[name = tensor<string, []>("op_1560_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1560_end_0 = const()[name = tensor<string, []>("op_1560_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1560_end_mask_0 = const()[name = tensor<string, []>("op_1560_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1560_cast_fp16 = slice_by_index(begin = var_1560_begin_0, end = var_1560_end_0, end_mask = var_1560_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1560_cast_fp16")];
+            tensor<int32, [4]> var_1564_begin_0 = const()[name = tensor<string, []>("op_1564_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1564_end_0 = const()[name = tensor<string, []>("op_1564_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1564_end_mask_0 = const()[name = tensor<string, []>("op_1564_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<int32, [4]> var_1568_begin_0 = const()[name = tensor<string, []>("op_1568_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1568_end_0 = const()[name = tensor<string, []>("op_1568_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1568_end_mask_0 = const()[name = tensor<string, []>("op_1568_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1568_cast_fp16 = slice_by_index(begin = var_1568_begin_0, end = var_1568_end_0, end_mask = var_1568_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1568_cast_fp16")];
+            tensor<int32, [4]> var_1572_begin_0 = const()[name = tensor<string, []>("op_1572_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1572_end_0 = const()[name = tensor<string, []>("op_1572_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1572_end_mask_0 = const()[name = tensor<string, []>("op_1572_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1572_cast_fp16 = slice_by_index(begin = var_1572_begin_0, end = var_1572_end_0, end_mask = var_1572_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1572_cast_fp16")];
+            tensor<int32, [4]> var_1576_begin_0 = const()[name = tensor<string, []>("op_1576_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_1576_end_0 = const()[name = tensor<string, []>("op_1576_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_1576_end_mask_0 = const()[name = tensor<string, []>("op_1576_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1576_cast_fp16 = slice_by_index(begin = var_1576_begin_0, end = var_1576_end_0, end_mask = var_1576_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1576_cast_fp16")];
+            tensor<int32, [4]> var_1580_begin_0 = const()[name = tensor<string, []>("op_1580_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_1580_end_0 = const()[name = tensor<string, []>("op_1580_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_1580_end_mask_0 = const()[name = tensor<string, []>("op_1580_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1580_cast_fp16 = slice_by_index(begin = var_1580_begin_0, end = var_1580_end_0, end_mask = var_1580_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1580_cast_fp16")];
+            tensor<int32, [4]> var_1584_begin_0 = const()[name = tensor<string, []>("op_1584_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_1584_end_0 = const()[name = tensor<string, []>("op_1584_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_1584_end_mask_0 = const()[name = tensor<string, []>("op_1584_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1584_cast_fp16 = slice_by_index(begin = var_1584_begin_0, end = var_1584_end_0, end_mask = var_1584_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1584_cast_fp16")];
+            tensor<int32, [4]> var_1588_begin_0 = const()[name = tensor<string, []>("op_1588_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_1588_end_0 = const()[name = tensor<string, []>("op_1588_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_1588_end_mask_0 = const()[name = tensor<string, []>("op_1588_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1588_cast_fp16 = slice_by_index(begin = var_1588_begin_0, end = var_1588_end_0, end_mask = var_1588_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1588_cast_fp16")];
+            tensor<int32, [4]> var_1592_begin_0 = const()[name = tensor<string, []>("op_1592_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_1592_end_0 = const()[name = tensor<string, []>("op_1592_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_1592_end_mask_0 = const()[name = tensor<string, []>("op_1592_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1592_cast_fp16 = slice_by_index(begin = var_1592_begin_0, end = var_1592_end_0, end_mask = var_1592_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1592_cast_fp16")];
+            tensor<int32, [4]> var_1596_begin_0 = const()[name = tensor<string, []>("op_1596_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_1596_end_0 = const()[name = tensor<string, []>("op_1596_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_1596_end_mask_0 = const()[name = tensor<string, []>("op_1596_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1596_cast_fp16 = slice_by_index(begin = var_1596_begin_0, end = var_1596_end_0, end_mask = var_1596_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1596_cast_fp16")];
+            tensor<int32, [4]> var_1598_begin_0 = const()[name = tensor<string, []>("op_1598_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1598_end_0 = const()[name = tensor<string, []>("op_1598_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1598_end_mask_0 = const()[name = tensor<string, []>("op_1598_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16 = slice_by_index(begin = var_1598_begin_0, end = var_1598_end_0, end_mask = var_1598_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<int32, [4]> var_1602_begin_0 = const()[name = tensor<string, []>("op_1602_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1602_end_0 = const()[name = tensor<string, []>("op_1602_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1602_end_mask_0 = const()[name = tensor<string, []>("op_1602_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1602_cast_fp16 = slice_by_index(begin = var_1602_begin_0, end = var_1602_end_0, end_mask = var_1602_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1602_cast_fp16")];
+            tensor<int32, [4]> var_1606_begin_0 = const()[name = tensor<string, []>("op_1606_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1606_end_0 = const()[name = tensor<string, []>("op_1606_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1606_end_mask_0 = const()[name = tensor<string, []>("op_1606_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1606_cast_fp16 = slice_by_index(begin = var_1606_begin_0, end = var_1606_end_0, end_mask = var_1606_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1606_cast_fp16")];
+            tensor<int32, [4]> var_1610_begin_0 = const()[name = tensor<string, []>("op_1610_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1610_end_0 = const()[name = tensor<string, []>("op_1610_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1610_end_mask_0 = const()[name = tensor<string, []>("op_1610_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1610_cast_fp16 = slice_by_index(begin = var_1610_begin_0, end = var_1610_end_0, end_mask = var_1610_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1610_cast_fp16")];
+            tensor<int32, [4]> var_1614_begin_0 = const()[name = tensor<string, []>("op_1614_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1614_end_0 = const()[name = tensor<string, []>("op_1614_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1614_end_mask_0 = const()[name = tensor<string, []>("op_1614_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1614_cast_fp16 = slice_by_index(begin = var_1614_begin_0, end = var_1614_end_0, end_mask = var_1614_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1614_cast_fp16")];
+            tensor<int32, [4]> var_1618_begin_0 = const()[name = tensor<string, []>("op_1618_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1618_end_0 = const()[name = tensor<string, []>("op_1618_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1618_end_mask_0 = const()[name = tensor<string, []>("op_1618_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1618_cast_fp16 = slice_by_index(begin = var_1618_begin_0, end = var_1618_end_0, end_mask = var_1618_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1618_cast_fp16")];
+            tensor<int32, [4]> var_1622_begin_0 = const()[name = tensor<string, []>("op_1622_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1622_end_0 = const()[name = tensor<string, []>("op_1622_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1622_end_mask_0 = const()[name = tensor<string, []>("op_1622_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1622_cast_fp16 = slice_by_index(begin = var_1622_begin_0, end = var_1622_end_0, end_mask = var_1622_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1622_cast_fp16")];
+            tensor<int32, [4]> var_1626_begin_0 = const()[name = tensor<string, []>("op_1626_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1626_end_0 = const()[name = tensor<string, []>("op_1626_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1626_end_mask_0 = const()[name = tensor<string, []>("op_1626_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1626_cast_fp16 = slice_by_index(begin = var_1626_begin_0, end = var_1626_end_0, end_mask = var_1626_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<int32, [4]> var_1630_begin_0 = const()[name = tensor<string, []>("op_1630_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_1630_end_0 = const()[name = tensor<string, []>("op_1630_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_1630_end_mask_0 = const()[name = tensor<string, []>("op_1630_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1630_cast_fp16 = slice_by_index(begin = var_1630_begin_0, end = var_1630_end_0, end_mask = var_1630_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1630_cast_fp16")];
+            tensor<int32, [4]> var_1634_begin_0 = const()[name = tensor<string, []>("op_1634_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_1634_end_0 = const()[name = tensor<string, []>("op_1634_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_1634_end_mask_0 = const()[name = tensor<string, []>("op_1634_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1634_cast_fp16 = slice_by_index(begin = var_1634_begin_0, end = var_1634_end_0, end_mask = var_1634_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1634_cast_fp16")];
+            tensor<int32, [4]> var_1638_begin_0 = const()[name = tensor<string, []>("op_1638_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_1638_end_0 = const()[name = tensor<string, []>("op_1638_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_1638_end_mask_0 = const()[name = tensor<string, []>("op_1638_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1638_cast_fp16 = slice_by_index(begin = var_1638_begin_0, end = var_1638_end_0, end_mask = var_1638_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<int32, [4]> var_1642_begin_0 = const()[name = tensor<string, []>("op_1642_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_1642_end_0 = const()[name = tensor<string, []>("op_1642_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_1642_end_mask_0 = const()[name = tensor<string, []>("op_1642_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1642_cast_fp16 = slice_by_index(begin = var_1642_begin_0, end = var_1642_end_0, end_mask = var_1642_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<string, []> var_1646_equation_0 = const()[name = tensor<string, []>("op_1646_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1552_cast_fp16, var_1218_cast_fp16))[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<fp16, []> var_1647_to_fp16 = const()[name = tensor<string, []>("op_1647_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_97_cast_fp16 = mul(x = var_1646_cast_fp16, y = var_1647_to_fp16)[name = tensor<string, []>("aw_chunk_97_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1552_cast_fp16, var_1225_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<fp16, []> var_1651_to_fp16 = const()[name = tensor<string, []>("op_1651_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_99_cast_fp16 = mul(x = var_1650_cast_fp16, y = var_1651_to_fp16)[name = tensor<string, []>("aw_chunk_99_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1552_cast_fp16, var_1232_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<fp16, []> var_1655_to_fp16 = const()[name = tensor<string, []>("op_1655_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_101_cast_fp16 = mul(x = var_1654_cast_fp16, y = var_1655_to_fp16)[name = tensor<string, []>("aw_chunk_101_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1552_cast_fp16, var_1239_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<fp16, []> var_1659_to_fp16 = const()[name = tensor<string, []>("op_1659_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_103_cast_fp16 = mul(x = var_1658_cast_fp16, y = var_1659_to_fp16)[name = tensor<string, []>("aw_chunk_103_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1556_cast_fp16, var_1246_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<fp16, []> var_1663_to_fp16 = const()[name = tensor<string, []>("op_1663_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_105_cast_fp16 = mul(x = var_1662_cast_fp16, y = var_1663_to_fp16)[name = tensor<string, []>("aw_chunk_105_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1556_cast_fp16, var_1253_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, []> var_1667_to_fp16 = const()[name = tensor<string, []>("op_1667_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_107_cast_fp16 = mul(x = var_1666_cast_fp16, y = var_1667_to_fp16)[name = tensor<string, []>("aw_chunk_107_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1556_cast_fp16, var_1260_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<fp16, []> var_1671_to_fp16 = const()[name = tensor<string, []>("op_1671_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_109_cast_fp16 = mul(x = var_1670_cast_fp16, y = var_1671_to_fp16)[name = tensor<string, []>("aw_chunk_109_cast_fp16")];
+            tensor<string, []> var_1674_equation_0 = const()[name = tensor<string, []>("op_1674_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1556_cast_fp16, var_1267_cast_fp16))[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<fp16, []> var_1675_to_fp16 = const()[name = tensor<string, []>("op_1675_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_111_cast_fp16 = mul(x = var_1674_cast_fp16, y = var_1675_to_fp16)[name = tensor<string, []>("aw_chunk_111_cast_fp16")];
+            tensor<string, []> var_1678_equation_0 = const()[name = tensor<string, []>("op_1678_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1560_cast_fp16, var_1274_cast_fp16))[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<fp16, []> var_1679_to_fp16 = const()[name = tensor<string, []>("op_1679_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_113_cast_fp16 = mul(x = var_1678_cast_fp16, y = var_1679_to_fp16)[name = tensor<string, []>("aw_chunk_113_cast_fp16")];
+            tensor<string, []> var_1682_equation_0 = const()[name = tensor<string, []>("op_1682_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1560_cast_fp16, var_1281_cast_fp16))[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<fp16, []> var_1683_to_fp16 = const()[name = tensor<string, []>("op_1683_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_115_cast_fp16 = mul(x = var_1682_cast_fp16, y = var_1683_to_fp16)[name = tensor<string, []>("aw_chunk_115_cast_fp16")];
+            tensor<string, []> var_1686_equation_0 = const()[name = tensor<string, []>("op_1686_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1686_cast_fp16 = einsum(equation = var_1686_equation_0, values = (var_1560_cast_fp16, var_1288_cast_fp16))[name = tensor<string, []>("op_1686_cast_fp16")];
+            tensor<fp16, []> var_1687_to_fp16 = const()[name = tensor<string, []>("op_1687_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_117_cast_fp16 = mul(x = var_1686_cast_fp16, y = var_1687_to_fp16)[name = tensor<string, []>("aw_chunk_117_cast_fp16")];
+            tensor<string, []> var_1690_equation_0 = const()[name = tensor<string, []>("op_1690_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1690_cast_fp16 = einsum(equation = var_1690_equation_0, values = (var_1560_cast_fp16, var_1295_cast_fp16))[name = tensor<string, []>("op_1690_cast_fp16")];
+            tensor<fp16, []> var_1691_to_fp16 = const()[name = tensor<string, []>("op_1691_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_119_cast_fp16 = mul(x = var_1690_cast_fp16, y = var_1691_to_fp16)[name = tensor<string, []>("aw_chunk_119_cast_fp16")];
+            tensor<string, []> var_1694_equation_0 = const()[name = tensor<string, []>("op_1694_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1694_cast_fp16 = einsum(equation = var_1694_equation_0, values = (var_1564_cast_fp16, var_1302_cast_fp16))[name = tensor<string, []>("op_1694_cast_fp16")];
+            tensor<fp16, []> var_1695_to_fp16 = const()[name = tensor<string, []>("op_1695_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_121_cast_fp16 = mul(x = var_1694_cast_fp16, y = var_1695_to_fp16)[name = tensor<string, []>("aw_chunk_121_cast_fp16")];
+            tensor<string, []> var_1698_equation_0 = const()[name = tensor<string, []>("op_1698_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1564_cast_fp16, var_1309_cast_fp16))[name = tensor<string, []>("op_1698_cast_fp16")];
+            tensor<fp16, []> var_1699_to_fp16 = const()[name = tensor<string, []>("op_1699_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_123_cast_fp16 = mul(x = var_1698_cast_fp16, y = var_1699_to_fp16)[name = tensor<string, []>("aw_chunk_123_cast_fp16")];
+            tensor<string, []> var_1702_equation_0 = const()[name = tensor<string, []>("op_1702_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1564_cast_fp16, var_1316_cast_fp16))[name = tensor<string, []>("op_1702_cast_fp16")];
+            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_125_cast_fp16 = mul(x = var_1702_cast_fp16, y = var_1703_to_fp16)[name = tensor<string, []>("aw_chunk_125_cast_fp16")];
+            tensor<string, []> var_1706_equation_0 = const()[name = tensor<string, []>("op_1706_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1564_cast_fp16, var_1323_cast_fp16))[name = tensor<string, []>("op_1706_cast_fp16")];
+            tensor<fp16, []> var_1707_to_fp16 = const()[name = tensor<string, []>("op_1707_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_127_cast_fp16 = mul(x = var_1706_cast_fp16, y = var_1707_to_fp16)[name = tensor<string, []>("aw_chunk_127_cast_fp16")];
+            tensor<string, []> var_1710_equation_0 = const()[name = tensor<string, []>("op_1710_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1568_cast_fp16, var_1330_cast_fp16))[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_129_cast_fp16 = mul(x = var_1710_cast_fp16, y = var_1711_to_fp16)[name = tensor<string, []>("aw_chunk_129_cast_fp16")];
+            tensor<string, []> var_1714_equation_0 = const()[name = tensor<string, []>("op_1714_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1568_cast_fp16, var_1337_cast_fp16))[name = tensor<string, []>("op_1714_cast_fp16")];
+            tensor<fp16, []> var_1715_to_fp16 = const()[name = tensor<string, []>("op_1715_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_131_cast_fp16 = mul(x = var_1714_cast_fp16, y = var_1715_to_fp16)[name = tensor<string, []>("aw_chunk_131_cast_fp16")];
+            tensor<string, []> var_1718_equation_0 = const()[name = tensor<string, []>("op_1718_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1568_cast_fp16, var_1344_cast_fp16))[name = tensor<string, []>("op_1718_cast_fp16")];
+            tensor<fp16, []> var_1719_to_fp16 = const()[name = tensor<string, []>("op_1719_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_133_cast_fp16 = mul(x = var_1718_cast_fp16, y = var_1719_to_fp16)[name = tensor<string, []>("aw_chunk_133_cast_fp16")];
+            tensor<string, []> var_1722_equation_0 = const()[name = tensor<string, []>("op_1722_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1568_cast_fp16, var_1351_cast_fp16))[name = tensor<string, []>("op_1722_cast_fp16")];
+            tensor<fp16, []> var_1723_to_fp16 = const()[name = tensor<string, []>("op_1723_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_135_cast_fp16 = mul(x = var_1722_cast_fp16, y = var_1723_to_fp16)[name = tensor<string, []>("aw_chunk_135_cast_fp16")];
+            tensor<string, []> var_1726_equation_0 = const()[name = tensor<string, []>("op_1726_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1572_cast_fp16, var_1358_cast_fp16))[name = tensor<string, []>("op_1726_cast_fp16")];
+            tensor<fp16, []> var_1727_to_fp16 = const()[name = tensor<string, []>("op_1727_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_137_cast_fp16 = mul(x = var_1726_cast_fp16, y = var_1727_to_fp16)[name = tensor<string, []>("aw_chunk_137_cast_fp16")];
+            tensor<string, []> var_1730_equation_0 = const()[name = tensor<string, []>("op_1730_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1572_cast_fp16, var_1365_cast_fp16))[name = tensor<string, []>("op_1730_cast_fp16")];
+            tensor<fp16, []> var_1731_to_fp16 = const()[name = tensor<string, []>("op_1731_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_139_cast_fp16 = mul(x = var_1730_cast_fp16, y = var_1731_to_fp16)[name = tensor<string, []>("aw_chunk_139_cast_fp16")];
+            tensor<string, []> var_1734_equation_0 = const()[name = tensor<string, []>("op_1734_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1572_cast_fp16, var_1372_cast_fp16))[name = tensor<string, []>("op_1734_cast_fp16")];
+            tensor<fp16, []> var_1735_to_fp16 = const()[name = tensor<string, []>("op_1735_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_141_cast_fp16 = mul(x = var_1734_cast_fp16, y = var_1735_to_fp16)[name = tensor<string, []>("aw_chunk_141_cast_fp16")];
+            tensor<string, []> var_1738_equation_0 = const()[name = tensor<string, []>("op_1738_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1572_cast_fp16, var_1379_cast_fp16))[name = tensor<string, []>("op_1738_cast_fp16")];
+            tensor<fp16, []> var_1739_to_fp16 = const()[name = tensor<string, []>("op_1739_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_143_cast_fp16 = mul(x = var_1738_cast_fp16, y = var_1739_to_fp16)[name = tensor<string, []>("aw_chunk_143_cast_fp16")];
+            tensor<string, []> var_1742_equation_0 = const()[name = tensor<string, []>("op_1742_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1576_cast_fp16, var_1386_cast_fp16))[name = tensor<string, []>("op_1742_cast_fp16")];
+            tensor<fp16, []> var_1743_to_fp16 = const()[name = tensor<string, []>("op_1743_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_145_cast_fp16 = mul(x = var_1742_cast_fp16, y = var_1743_to_fp16)[name = tensor<string, []>("aw_chunk_145_cast_fp16")];
+            tensor<string, []> var_1746_equation_0 = const()[name = tensor<string, []>("op_1746_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1576_cast_fp16, var_1393_cast_fp16))[name = tensor<string, []>("op_1746_cast_fp16")];
+            tensor<fp16, []> var_1747_to_fp16 = const()[name = tensor<string, []>("op_1747_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_147_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1747_to_fp16)[name = tensor<string, []>("aw_chunk_147_cast_fp16")];
+            tensor<string, []> var_1750_equation_0 = const()[name = tensor<string, []>("op_1750_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1576_cast_fp16, var_1400_cast_fp16))[name = tensor<string, []>("op_1750_cast_fp16")];
+            tensor<fp16, []> var_1751_to_fp16 = const()[name = tensor<string, []>("op_1751_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_149_cast_fp16 = mul(x = var_1750_cast_fp16, y = var_1751_to_fp16)[name = tensor<string, []>("aw_chunk_149_cast_fp16")];
+            tensor<string, []> var_1754_equation_0 = const()[name = tensor<string, []>("op_1754_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1576_cast_fp16, var_1407_cast_fp16))[name = tensor<string, []>("op_1754_cast_fp16")];
+            tensor<fp16, []> var_1755_to_fp16 = const()[name = tensor<string, []>("op_1755_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_151_cast_fp16 = mul(x = var_1754_cast_fp16, y = var_1755_to_fp16)[name = tensor<string, []>("aw_chunk_151_cast_fp16")];
+            tensor<string, []> var_1758_equation_0 = const()[name = tensor<string, []>("op_1758_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1580_cast_fp16, var_1414_cast_fp16))[name = tensor<string, []>("op_1758_cast_fp16")];
+            tensor<fp16, []> var_1759_to_fp16 = const()[name = tensor<string, []>("op_1759_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_153_cast_fp16 = mul(x = var_1758_cast_fp16, y = var_1759_to_fp16)[name = tensor<string, []>("aw_chunk_153_cast_fp16")];
+            tensor<string, []> var_1762_equation_0 = const()[name = tensor<string, []>("op_1762_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1762_cast_fp16 = einsum(equation = var_1762_equation_0, values = (var_1580_cast_fp16, var_1421_cast_fp16))[name = tensor<string, []>("op_1762_cast_fp16")];
+            tensor<fp16, []> var_1763_to_fp16 = const()[name = tensor<string, []>("op_1763_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_155_cast_fp16 = mul(x = var_1762_cast_fp16, y = var_1763_to_fp16)[name = tensor<string, []>("aw_chunk_155_cast_fp16")];
+            tensor<string, []> var_1766_equation_0 = const()[name = tensor<string, []>("op_1766_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1766_cast_fp16 = einsum(equation = var_1766_equation_0, values = (var_1580_cast_fp16, var_1428_cast_fp16))[name = tensor<string, []>("op_1766_cast_fp16")];
+            tensor<fp16, []> var_1767_to_fp16 = const()[name = tensor<string, []>("op_1767_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_157_cast_fp16 = mul(x = var_1766_cast_fp16, y = var_1767_to_fp16)[name = tensor<string, []>("aw_chunk_157_cast_fp16")];
+            tensor<string, []> var_1770_equation_0 = const()[name = tensor<string, []>("op_1770_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1770_cast_fp16 = einsum(equation = var_1770_equation_0, values = (var_1580_cast_fp16, var_1435_cast_fp16))[name = tensor<string, []>("op_1770_cast_fp16")];
+            tensor<fp16, []> var_1771_to_fp16 = const()[name = tensor<string, []>("op_1771_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_159_cast_fp16 = mul(x = var_1770_cast_fp16, y = var_1771_to_fp16)[name = tensor<string, []>("aw_chunk_159_cast_fp16")];
+            tensor<string, []> var_1774_equation_0 = const()[name = tensor<string, []>("op_1774_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1774_cast_fp16 = einsum(equation = var_1774_equation_0, values = (var_1584_cast_fp16, var_1442_cast_fp16))[name = tensor<string, []>("op_1774_cast_fp16")];
+            tensor<fp16, []> var_1775_to_fp16 = const()[name = tensor<string, []>("op_1775_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_161_cast_fp16 = mul(x = var_1774_cast_fp16, y = var_1775_to_fp16)[name = tensor<string, []>("aw_chunk_161_cast_fp16")];
+            tensor<string, []> var_1778_equation_0 = const()[name = tensor<string, []>("op_1778_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1778_cast_fp16 = einsum(equation = var_1778_equation_0, values = (var_1584_cast_fp16, var_1449_cast_fp16))[name = tensor<string, []>("op_1778_cast_fp16")];
+            tensor<fp16, []> var_1779_to_fp16 = const()[name = tensor<string, []>("op_1779_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_163_cast_fp16 = mul(x = var_1778_cast_fp16, y = var_1779_to_fp16)[name = tensor<string, []>("aw_chunk_163_cast_fp16")];
+            tensor<string, []> var_1782_equation_0 = const()[name = tensor<string, []>("op_1782_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1782_cast_fp16 = einsum(equation = var_1782_equation_0, values = (var_1584_cast_fp16, var_1456_cast_fp16))[name = tensor<string, []>("op_1782_cast_fp16")];
+            tensor<fp16, []> var_1783_to_fp16 = const()[name = tensor<string, []>("op_1783_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_165_cast_fp16 = mul(x = var_1782_cast_fp16, y = var_1783_to_fp16)[name = tensor<string, []>("aw_chunk_165_cast_fp16")];
+            tensor<string, []> var_1786_equation_0 = const()[name = tensor<string, []>("op_1786_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1786_cast_fp16 = einsum(equation = var_1786_equation_0, values = (var_1584_cast_fp16, var_1463_cast_fp16))[name = tensor<string, []>("op_1786_cast_fp16")];
+            tensor<fp16, []> var_1787_to_fp16 = const()[name = tensor<string, []>("op_1787_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_167_cast_fp16 = mul(x = var_1786_cast_fp16, y = var_1787_to_fp16)[name = tensor<string, []>("aw_chunk_167_cast_fp16")];
+            tensor<string, []> var_1790_equation_0 = const()[name = tensor<string, []>("op_1790_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1790_cast_fp16 = einsum(equation = var_1790_equation_0, values = (var_1588_cast_fp16, var_1470_cast_fp16))[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<fp16, []> var_1791_to_fp16 = const()[name = tensor<string, []>("op_1791_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_169_cast_fp16 = mul(x = var_1790_cast_fp16, y = var_1791_to_fp16)[name = tensor<string, []>("aw_chunk_169_cast_fp16")];
+            tensor<string, []> var_1794_equation_0 = const()[name = tensor<string, []>("op_1794_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1794_cast_fp16 = einsum(equation = var_1794_equation_0, values = (var_1588_cast_fp16, var_1477_cast_fp16))[name = tensor<string, []>("op_1794_cast_fp16")];
+            tensor<fp16, []> var_1795_to_fp16 = const()[name = tensor<string, []>("op_1795_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_171_cast_fp16 = mul(x = var_1794_cast_fp16, y = var_1795_to_fp16)[name = tensor<string, []>("aw_chunk_171_cast_fp16")];
+            tensor<string, []> var_1798_equation_0 = const()[name = tensor<string, []>("op_1798_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1798_cast_fp16 = einsum(equation = var_1798_equation_0, values = (var_1588_cast_fp16, var_1484_cast_fp16))[name = tensor<string, []>("op_1798_cast_fp16")];
+            tensor<fp16, []> var_1799_to_fp16 = const()[name = tensor<string, []>("op_1799_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_173_cast_fp16 = mul(x = var_1798_cast_fp16, y = var_1799_to_fp16)[name = tensor<string, []>("aw_chunk_173_cast_fp16")];
+            tensor<string, []> var_1802_equation_0 = const()[name = tensor<string, []>("op_1802_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1802_cast_fp16 = einsum(equation = var_1802_equation_0, values = (var_1588_cast_fp16, var_1491_cast_fp16))[name = tensor<string, []>("op_1802_cast_fp16")];
+            tensor<fp16, []> var_1803_to_fp16 = const()[name = tensor<string, []>("op_1803_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_175_cast_fp16 = mul(x = var_1802_cast_fp16, y = var_1803_to_fp16)[name = tensor<string, []>("aw_chunk_175_cast_fp16")];
+            tensor<string, []> var_1806_equation_0 = const()[name = tensor<string, []>("op_1806_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1806_cast_fp16 = einsum(equation = var_1806_equation_0, values = (var_1592_cast_fp16, var_1498_cast_fp16))[name = tensor<string, []>("op_1806_cast_fp16")];
+            tensor<fp16, []> var_1807_to_fp16 = const()[name = tensor<string, []>("op_1807_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_177_cast_fp16 = mul(x = var_1806_cast_fp16, y = var_1807_to_fp16)[name = tensor<string, []>("aw_chunk_177_cast_fp16")];
+            tensor<string, []> var_1810_equation_0 = const()[name = tensor<string, []>("op_1810_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1810_cast_fp16 = einsum(equation = var_1810_equation_0, values = (var_1592_cast_fp16, var_1505_cast_fp16))[name = tensor<string, []>("op_1810_cast_fp16")];
+            tensor<fp16, []> var_1811_to_fp16 = const()[name = tensor<string, []>("op_1811_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_179_cast_fp16 = mul(x = var_1810_cast_fp16, y = var_1811_to_fp16)[name = tensor<string, []>("aw_chunk_179_cast_fp16")];
+            tensor<string, []> var_1814_equation_0 = const()[name = tensor<string, []>("op_1814_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1814_cast_fp16 = einsum(equation = var_1814_equation_0, values = (var_1592_cast_fp16, var_1512_cast_fp16))[name = tensor<string, []>("op_1814_cast_fp16")];
+            tensor<fp16, []> var_1815_to_fp16 = const()[name = tensor<string, []>("op_1815_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_181_cast_fp16 = mul(x = var_1814_cast_fp16, y = var_1815_to_fp16)[name = tensor<string, []>("aw_chunk_181_cast_fp16")];
+            tensor<string, []> var_1818_equation_0 = const()[name = tensor<string, []>("op_1818_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1818_cast_fp16 = einsum(equation = var_1818_equation_0, values = (var_1592_cast_fp16, var_1519_cast_fp16))[name = tensor<string, []>("op_1818_cast_fp16")];
+            tensor<fp16, []> var_1819_to_fp16 = const()[name = tensor<string, []>("op_1819_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_183_cast_fp16 = mul(x = var_1818_cast_fp16, y = var_1819_to_fp16)[name = tensor<string, []>("aw_chunk_183_cast_fp16")];
+            tensor<string, []> var_1822_equation_0 = const()[name = tensor<string, []>("op_1822_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1822_cast_fp16 = einsum(equation = var_1822_equation_0, values = (var_1596_cast_fp16, var_1526_cast_fp16))[name = tensor<string, []>("op_1822_cast_fp16")];
+            tensor<fp16, []> var_1823_to_fp16 = const()[name = tensor<string, []>("op_1823_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_185_cast_fp16 = mul(x = var_1822_cast_fp16, y = var_1823_to_fp16)[name = tensor<string, []>("aw_chunk_185_cast_fp16")];
+            tensor<string, []> var_1826_equation_0 = const()[name = tensor<string, []>("op_1826_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1826_cast_fp16 = einsum(equation = var_1826_equation_0, values = (var_1596_cast_fp16, var_1533_cast_fp16))[name = tensor<string, []>("op_1826_cast_fp16")];
+            tensor<fp16, []> var_1827_to_fp16 = const()[name = tensor<string, []>("op_1827_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_187_cast_fp16 = mul(x = var_1826_cast_fp16, y = var_1827_to_fp16)[name = tensor<string, []>("aw_chunk_187_cast_fp16")];
+            tensor<string, []> var_1830_equation_0 = const()[name = tensor<string, []>("op_1830_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1830_cast_fp16 = einsum(equation = var_1830_equation_0, values = (var_1596_cast_fp16, var_1540_cast_fp16))[name = tensor<string, []>("op_1830_cast_fp16")];
+            tensor<fp16, []> var_1831_to_fp16 = const()[name = tensor<string, []>("op_1831_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_189_cast_fp16 = mul(x = var_1830_cast_fp16, y = var_1831_to_fp16)[name = tensor<string, []>("aw_chunk_189_cast_fp16")];
+            tensor<string, []> var_1834_equation_0 = const()[name = tensor<string, []>("op_1834_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1834_cast_fp16 = einsum(equation = var_1834_equation_0, values = (var_1596_cast_fp16, var_1547_cast_fp16))[name = tensor<string, []>("op_1834_cast_fp16")];
+            tensor<fp16, []> var_1835_to_fp16 = const()[name = tensor<string, []>("op_1835_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_191_cast_fp16 = mul(x = var_1834_cast_fp16, y = var_1835_to_fp16)[name = tensor<string, []>("aw_chunk_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1837_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_97_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1838_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_99_cast_fp16)[name = tensor<string, []>("op_1838_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1839_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_101_cast_fp16)[name = tensor<string, []>("op_1839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1840_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_103_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1841_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_105_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1842_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_107_cast_fp16)[name = tensor<string, []>("op_1842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1843_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_109_cast_fp16)[name = tensor<string, []>("op_1843_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1844_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_111_cast_fp16)[name = tensor<string, []>("op_1844_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1845_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_113_cast_fp16)[name = tensor<string, []>("op_1845_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1846_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_115_cast_fp16)[name = tensor<string, []>("op_1846_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1847_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_117_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1848_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_119_cast_fp16)[name = tensor<string, []>("op_1848_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1849_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_121_cast_fp16)[name = tensor<string, []>("op_1849_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1850_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_123_cast_fp16)[name = tensor<string, []>("op_1850_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1851_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_125_cast_fp16)[name = tensor<string, []>("op_1851_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1852_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_127_cast_fp16)[name = tensor<string, []>("op_1852_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1853_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_129_cast_fp16)[name = tensor<string, []>("op_1853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1854_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_131_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1855_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_133_cast_fp16)[name = tensor<string, []>("op_1855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1856_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_135_cast_fp16)[name = tensor<string, []>("op_1856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1857_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_137_cast_fp16)[name = tensor<string, []>("op_1857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1858_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_139_cast_fp16)[name = tensor<string, []>("op_1858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1859_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_141_cast_fp16)[name = tensor<string, []>("op_1859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1860_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_143_cast_fp16)[name = tensor<string, []>("op_1860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1861_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_145_cast_fp16)[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1862_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_147_cast_fp16)[name = tensor<string, []>("op_1862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1863_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_149_cast_fp16)[name = tensor<string, []>("op_1863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1864_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_151_cast_fp16)[name = tensor<string, []>("op_1864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1865_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_153_cast_fp16)[name = tensor<string, []>("op_1865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1866_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_155_cast_fp16)[name = tensor<string, []>("op_1866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1867_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_157_cast_fp16)[name = tensor<string, []>("op_1867_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1868_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_159_cast_fp16)[name = tensor<string, []>("op_1868_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1869_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_161_cast_fp16)[name = tensor<string, []>("op_1869_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1870_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_163_cast_fp16)[name = tensor<string, []>("op_1870_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1871_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_165_cast_fp16)[name = tensor<string, []>("op_1871_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1872_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_167_cast_fp16)[name = tensor<string, []>("op_1872_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1873_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_169_cast_fp16)[name = tensor<string, []>("op_1873_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1874_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_171_cast_fp16)[name = tensor<string, []>("op_1874_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1875_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_173_cast_fp16)[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1876_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_175_cast_fp16)[name = tensor<string, []>("op_1876_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1877_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_177_cast_fp16)[name = tensor<string, []>("op_1877_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1878_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_179_cast_fp16)[name = tensor<string, []>("op_1878_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1879_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_181_cast_fp16)[name = tensor<string, []>("op_1879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1880_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_183_cast_fp16)[name = tensor<string, []>("op_1880_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1881_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_185_cast_fp16)[name = tensor<string, []>("op_1881_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1882_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_187_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1883_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_189_cast_fp16)[name = tensor<string, []>("op_1883_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1884_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_191_cast_fp16)[name = tensor<string, []>("op_1884_cast_fp16")];
+            tensor<string, []> var_1886_equation_0 = const()[name = tensor<string, []>("op_1886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1886_cast_fp16 = einsum(equation = var_1886_equation_0, values = (var_1598_cast_fp16, var_1837_cast_fp16))[name = tensor<string, []>("op_1886_cast_fp16")];
+            tensor<string, []> var_1888_equation_0 = const()[name = tensor<string, []>("op_1888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1888_cast_fp16 = einsum(equation = var_1888_equation_0, values = (var_1598_cast_fp16, var_1838_cast_fp16))[name = tensor<string, []>("op_1888_cast_fp16")];
+            tensor<string, []> var_1890_equation_0 = const()[name = tensor<string, []>("op_1890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1890_cast_fp16 = einsum(equation = var_1890_equation_0, values = (var_1598_cast_fp16, var_1839_cast_fp16))[name = tensor<string, []>("op_1890_cast_fp16")];
+            tensor<string, []> var_1892_equation_0 = const()[name = tensor<string, []>("op_1892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1892_cast_fp16 = einsum(equation = var_1892_equation_0, values = (var_1598_cast_fp16, var_1840_cast_fp16))[name = tensor<string, []>("op_1892_cast_fp16")];
+            tensor<string, []> var_1894_equation_0 = const()[name = tensor<string, []>("op_1894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1894_cast_fp16 = einsum(equation = var_1894_equation_0, values = (var_1602_cast_fp16, var_1841_cast_fp16))[name = tensor<string, []>("op_1894_cast_fp16")];
+            tensor<string, []> var_1896_equation_0 = const()[name = tensor<string, []>("op_1896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1896_cast_fp16 = einsum(equation = var_1896_equation_0, values = (var_1602_cast_fp16, var_1842_cast_fp16))[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<string, []> var_1898_equation_0 = const()[name = tensor<string, []>("op_1898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1898_cast_fp16 = einsum(equation = var_1898_equation_0, values = (var_1602_cast_fp16, var_1843_cast_fp16))[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<string, []> var_1900_equation_0 = const()[name = tensor<string, []>("op_1900_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1900_cast_fp16 = einsum(equation = var_1900_equation_0, values = (var_1602_cast_fp16, var_1844_cast_fp16))[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<string, []> var_1902_equation_0 = const()[name = tensor<string, []>("op_1902_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1902_cast_fp16 = einsum(equation = var_1902_equation_0, values = (var_1606_cast_fp16, var_1845_cast_fp16))[name = tensor<string, []>("op_1902_cast_fp16")];
+            tensor<string, []> var_1904_equation_0 = const()[name = tensor<string, []>("op_1904_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1904_cast_fp16 = einsum(equation = var_1904_equation_0, values = (var_1606_cast_fp16, var_1846_cast_fp16))[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<string, []> var_1906_equation_0 = const()[name = tensor<string, []>("op_1906_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1906_cast_fp16 = einsum(equation = var_1906_equation_0, values = (var_1606_cast_fp16, var_1847_cast_fp16))[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<string, []> var_1908_equation_0 = const()[name = tensor<string, []>("op_1908_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1908_cast_fp16 = einsum(equation = var_1908_equation_0, values = (var_1606_cast_fp16, var_1848_cast_fp16))[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<string, []> var_1910_equation_0 = const()[name = tensor<string, []>("op_1910_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1910_cast_fp16 = einsum(equation = var_1910_equation_0, values = (var_1610_cast_fp16, var_1849_cast_fp16))[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<string, []> var_1912_equation_0 = const()[name = tensor<string, []>("op_1912_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1912_cast_fp16 = einsum(equation = var_1912_equation_0, values = (var_1610_cast_fp16, var_1850_cast_fp16))[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<string, []> var_1914_equation_0 = const()[name = tensor<string, []>("op_1914_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1914_cast_fp16 = einsum(equation = var_1914_equation_0, values = (var_1610_cast_fp16, var_1851_cast_fp16))[name = tensor<string, []>("op_1914_cast_fp16")];
+            tensor<string, []> var_1916_equation_0 = const()[name = tensor<string, []>("op_1916_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1916_cast_fp16 = einsum(equation = var_1916_equation_0, values = (var_1610_cast_fp16, var_1852_cast_fp16))[name = tensor<string, []>("op_1916_cast_fp16")];
+            tensor<string, []> var_1918_equation_0 = const()[name = tensor<string, []>("op_1918_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1918_cast_fp16 = einsum(equation = var_1918_equation_0, values = (var_1614_cast_fp16, var_1853_cast_fp16))[name = tensor<string, []>("op_1918_cast_fp16")];
+            tensor<string, []> var_1920_equation_0 = const()[name = tensor<string, []>("op_1920_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1920_cast_fp16 = einsum(equation = var_1920_equation_0, values = (var_1614_cast_fp16, var_1854_cast_fp16))[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<string, []> var_1922_equation_0 = const()[name = tensor<string, []>("op_1922_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1922_cast_fp16 = einsum(equation = var_1922_equation_0, values = (var_1614_cast_fp16, var_1855_cast_fp16))[name = tensor<string, []>("op_1922_cast_fp16")];
+            tensor<string, []> var_1924_equation_0 = const()[name = tensor<string, []>("op_1924_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1924_cast_fp16 = einsum(equation = var_1924_equation_0, values = (var_1614_cast_fp16, var_1856_cast_fp16))[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<string, []> var_1926_equation_0 = const()[name = tensor<string, []>("op_1926_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1926_cast_fp16 = einsum(equation = var_1926_equation_0, values = (var_1618_cast_fp16, var_1857_cast_fp16))[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<string, []> var_1928_equation_0 = const()[name = tensor<string, []>("op_1928_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1928_cast_fp16 = einsum(equation = var_1928_equation_0, values = (var_1618_cast_fp16, var_1858_cast_fp16))[name = tensor<string, []>("op_1928_cast_fp16")];
+            tensor<string, []> var_1930_equation_0 = const()[name = tensor<string, []>("op_1930_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1930_cast_fp16 = einsum(equation = var_1930_equation_0, values = (var_1618_cast_fp16, var_1859_cast_fp16))[name = tensor<string, []>("op_1930_cast_fp16")];
+            tensor<string, []> var_1932_equation_0 = const()[name = tensor<string, []>("op_1932_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1932_cast_fp16 = einsum(equation = var_1932_equation_0, values = (var_1618_cast_fp16, var_1860_cast_fp16))[name = tensor<string, []>("op_1932_cast_fp16")];
+            tensor<string, []> var_1934_equation_0 = const()[name = tensor<string, []>("op_1934_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1934_cast_fp16 = einsum(equation = var_1934_equation_0, values = (var_1622_cast_fp16, var_1861_cast_fp16))[name = tensor<string, []>("op_1934_cast_fp16")];
+            tensor<string, []> var_1936_equation_0 = const()[name = tensor<string, []>("op_1936_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1936_cast_fp16 = einsum(equation = var_1936_equation_0, values = (var_1622_cast_fp16, var_1862_cast_fp16))[name = tensor<string, []>("op_1936_cast_fp16")];
+            tensor<string, []> var_1938_equation_0 = const()[name = tensor<string, []>("op_1938_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1938_cast_fp16 = einsum(equation = var_1938_equation_0, values = (var_1622_cast_fp16, var_1863_cast_fp16))[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<string, []> var_1940_equation_0 = const()[name = tensor<string, []>("op_1940_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1940_cast_fp16 = einsum(equation = var_1940_equation_0, values = (var_1622_cast_fp16, var_1864_cast_fp16))[name = tensor<string, []>("op_1940_cast_fp16")];
+            tensor<string, []> var_1942_equation_0 = const()[name = tensor<string, []>("op_1942_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1942_cast_fp16 = einsum(equation = var_1942_equation_0, values = (var_1626_cast_fp16, var_1865_cast_fp16))[name = tensor<string, []>("op_1942_cast_fp16")];
+            tensor<string, []> var_1944_equation_0 = const()[name = tensor<string, []>("op_1944_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1944_cast_fp16 = einsum(equation = var_1944_equation_0, values = (var_1626_cast_fp16, var_1866_cast_fp16))[name = tensor<string, []>("op_1944_cast_fp16")];
+            tensor<string, []> var_1946_equation_0 = const()[name = tensor<string, []>("op_1946_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1946_cast_fp16 = einsum(equation = var_1946_equation_0, values = (var_1626_cast_fp16, var_1867_cast_fp16))[name = tensor<string, []>("op_1946_cast_fp16")];
+            tensor<string, []> var_1948_equation_0 = const()[name = tensor<string, []>("op_1948_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1948_cast_fp16 = einsum(equation = var_1948_equation_0, values = (var_1626_cast_fp16, var_1868_cast_fp16))[name = tensor<string, []>("op_1948_cast_fp16")];
+            tensor<string, []> var_1950_equation_0 = const()[name = tensor<string, []>("op_1950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1950_cast_fp16 = einsum(equation = var_1950_equation_0, values = (var_1630_cast_fp16, var_1869_cast_fp16))[name = tensor<string, []>("op_1950_cast_fp16")];
+            tensor<string, []> var_1952_equation_0 = const()[name = tensor<string, []>("op_1952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1952_cast_fp16 = einsum(equation = var_1952_equation_0, values = (var_1630_cast_fp16, var_1870_cast_fp16))[name = tensor<string, []>("op_1952_cast_fp16")];
+            tensor<string, []> var_1954_equation_0 = const()[name = tensor<string, []>("op_1954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1954_cast_fp16 = einsum(equation = var_1954_equation_0, values = (var_1630_cast_fp16, var_1871_cast_fp16))[name = tensor<string, []>("op_1954_cast_fp16")];
+            tensor<string, []> var_1956_equation_0 = const()[name = tensor<string, []>("op_1956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1956_cast_fp16 = einsum(equation = var_1956_equation_0, values = (var_1630_cast_fp16, var_1872_cast_fp16))[name = tensor<string, []>("op_1956_cast_fp16")];
+            tensor<string, []> var_1958_equation_0 = const()[name = tensor<string, []>("op_1958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1958_cast_fp16 = einsum(equation = var_1958_equation_0, values = (var_1634_cast_fp16, var_1873_cast_fp16))[name = tensor<string, []>("op_1958_cast_fp16")];
+            tensor<string, []> var_1960_equation_0 = const()[name = tensor<string, []>("op_1960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1960_cast_fp16 = einsum(equation = var_1960_equation_0, values = (var_1634_cast_fp16, var_1874_cast_fp16))[name = tensor<string, []>("op_1960_cast_fp16")];
+            tensor<string, []> var_1962_equation_0 = const()[name = tensor<string, []>("op_1962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1962_cast_fp16 = einsum(equation = var_1962_equation_0, values = (var_1634_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<string, []> var_1964_equation_0 = const()[name = tensor<string, []>("op_1964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1964_cast_fp16 = einsum(equation = var_1964_equation_0, values = (var_1634_cast_fp16, var_1876_cast_fp16))[name = tensor<string, []>("op_1964_cast_fp16")];
+            tensor<string, []> var_1966_equation_0 = const()[name = tensor<string, []>("op_1966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1638_cast_fp16, var_1877_cast_fp16))[name = tensor<string, []>("op_1966_cast_fp16")];
+            tensor<string, []> var_1968_equation_0 = const()[name = tensor<string, []>("op_1968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1638_cast_fp16, var_1878_cast_fp16))[name = tensor<string, []>("op_1968_cast_fp16")];
+            tensor<string, []> var_1970_equation_0 = const()[name = tensor<string, []>("op_1970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1638_cast_fp16, var_1879_cast_fp16))[name = tensor<string, []>("op_1970_cast_fp16")];
+            tensor<string, []> var_1972_equation_0 = const()[name = tensor<string, []>("op_1972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1638_cast_fp16, var_1880_cast_fp16))[name = tensor<string, []>("op_1972_cast_fp16")];
+            tensor<string, []> var_1974_equation_0 = const()[name = tensor<string, []>("op_1974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1642_cast_fp16, var_1881_cast_fp16))[name = tensor<string, []>("op_1974_cast_fp16")];
+            tensor<string, []> var_1976_equation_0 = const()[name = tensor<string, []>("op_1976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1642_cast_fp16, var_1882_cast_fp16))[name = tensor<string, []>("op_1976_cast_fp16")];
+            tensor<string, []> var_1978_equation_0 = const()[name = tensor<string, []>("op_1978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1642_cast_fp16, var_1883_cast_fp16))[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<string, []> var_1980_equation_0 = const()[name = tensor<string, []>("op_1980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1642_cast_fp16, var_1884_cast_fp16))[name = tensor<string, []>("op_1980_cast_fp16")];
+            tensor<bool, []> var_1982_interleave_0 = const()[name = tensor<string, []>("op_1982_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1982_cast_fp16 = concat(axis = var_1093, interleave = var_1982_interleave_0, values = (var_1886_cast_fp16, var_1888_cast_fp16, var_1890_cast_fp16, var_1892_cast_fp16))[name = tensor<string, []>("op_1982_cast_fp16")];
+            tensor<bool, []> var_1984_interleave_0 = const()[name = tensor<string, []>("op_1984_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1984_cast_fp16 = concat(axis = var_1093, interleave = var_1984_interleave_0, values = (var_1894_cast_fp16, var_1896_cast_fp16, var_1898_cast_fp16, var_1900_cast_fp16))[name = tensor<string, []>("op_1984_cast_fp16")];
+            tensor<bool, []> var_1986_interleave_0 = const()[name = tensor<string, []>("op_1986_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1986_cast_fp16 = concat(axis = var_1093, interleave = var_1986_interleave_0, values = (var_1902_cast_fp16, var_1904_cast_fp16, var_1906_cast_fp16, var_1908_cast_fp16))[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<bool, []> var_1988_interleave_0 = const()[name = tensor<string, []>("op_1988_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1988_cast_fp16 = concat(axis = var_1093, interleave = var_1988_interleave_0, values = (var_1910_cast_fp16, var_1912_cast_fp16, var_1914_cast_fp16, var_1916_cast_fp16))[name = tensor<string, []>("op_1988_cast_fp16")];
+            tensor<bool, []> var_1990_interleave_0 = const()[name = tensor<string, []>("op_1990_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1990_cast_fp16 = concat(axis = var_1093, interleave = var_1990_interleave_0, values = (var_1918_cast_fp16, var_1920_cast_fp16, var_1922_cast_fp16, var_1924_cast_fp16))[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<bool, []> var_1992_interleave_0 = const()[name = tensor<string, []>("op_1992_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1992_cast_fp16 = concat(axis = var_1093, interleave = var_1992_interleave_0, values = (var_1926_cast_fp16, var_1928_cast_fp16, var_1930_cast_fp16, var_1932_cast_fp16))[name = tensor<string, []>("op_1992_cast_fp16")];
+            tensor<bool, []> var_1994_interleave_0 = const()[name = tensor<string, []>("op_1994_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1994_cast_fp16 = concat(axis = var_1093, interleave = var_1994_interleave_0, values = (var_1934_cast_fp16, var_1936_cast_fp16, var_1938_cast_fp16, var_1940_cast_fp16))[name = tensor<string, []>("op_1994_cast_fp16")];
+            tensor<bool, []> var_1996_interleave_0 = const()[name = tensor<string, []>("op_1996_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1996_cast_fp16 = concat(axis = var_1093, interleave = var_1996_interleave_0, values = (var_1942_cast_fp16, var_1944_cast_fp16, var_1946_cast_fp16, var_1948_cast_fp16))[name = tensor<string, []>("op_1996_cast_fp16")];
+            tensor<bool, []> var_1998_interleave_0 = const()[name = tensor<string, []>("op_1998_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1998_cast_fp16 = concat(axis = var_1093, interleave = var_1998_interleave_0, values = (var_1950_cast_fp16, var_1952_cast_fp16, var_1954_cast_fp16, var_1956_cast_fp16))[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<bool, []> var_2000_interleave_0 = const()[name = tensor<string, []>("op_2000_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2000_cast_fp16 = concat(axis = var_1093, interleave = var_2000_interleave_0, values = (var_1958_cast_fp16, var_1960_cast_fp16, var_1962_cast_fp16, var_1964_cast_fp16))[name = tensor<string, []>("op_2000_cast_fp16")];
+            tensor<bool, []> var_2002_interleave_0 = const()[name = tensor<string, []>("op_2002_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2002_cast_fp16 = concat(axis = var_1093, interleave = var_2002_interleave_0, values = (var_1966_cast_fp16, var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16))[name = tensor<string, []>("op_2002_cast_fp16")];
+            tensor<bool, []> var_2004_interleave_0 = const()[name = tensor<string, []>("op_2004_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2004_cast_fp16 = concat(axis = var_1093, interleave = var_2004_interleave_0, values = (var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16, var_1980_cast_fp16))[name = tensor<string, []>("op_2004_cast_fp16")];
+            tensor<bool, []> input_9_interleave_0 = const()[name = tensor<string, []>("input_9_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_9_cast_fp16 = concat(axis = var_1110, interleave = input_9_interleave_0, values = (var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16, var_1990_cast_fp16, var_1992_cast_fp16, var_1994_cast_fp16, var_1996_cast_fp16, var_1998_cast_fp16, var_2000_cast_fp16, var_2002_cast_fp16, var_2004_cast_fp16))[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_2009 = const()[name = tensor<string, []>("op_2009"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2011 = const()[name = tensor<string, []>("op_2011"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23938944)))];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25118656)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_2011, groups = var_1110, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_2009, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> var_2017 = const()[name = tensor<string, []>("op_2017"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_2017, keep_dims = var_1111, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_2021 = const()[name = tensor<string, []>("op_2021"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2022_cast_fp16 = reduce_mean(axes = var_2021, keep_dims = var_1111, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_2022_cast_fp16")];
+            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2024_cast_fp16 = add(x = var_2022_cast_fp16, y = var_2023_to_fp16)[name = tensor<string, []>("op_2024_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_2024_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [768]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25120256)))];
+            tensor<fp16, [768]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25121856)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_2035 = const()[name = tensor<string, []>("op_2035"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2037 = const()[name = tensor<string, []>("op_2037"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25123456)))];
+            tensor<fp16, [3072]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29842112)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_2037, groups = var_1110, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_2035, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_2043 = const()[name = tensor<string, []>("op_2043"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2045 = const()[name = tensor<string, []>("op_2045"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29848320)))];
+            tensor<fp16, [768]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34566976)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_2045, groups = var_1110, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_2043, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_2052 = const()[name = tensor<string, []>("op_2052"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2069 = const()[name = tensor<string, []>("op_2069"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2070 = const()[name = tensor<string, []>("op_2070"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_2080, keep_dims = var_2070, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_2084 = const()[name = tensor<string, []>("op_2084"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2085_cast_fp16 = reduce_mean(axes = var_2084, keep_dims = var_2070, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_2085_cast_fp16")];
+            tensor<fp16, []> var_2086_to_fp16 = const()[name = tensor<string, []>("op_2086_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2087_cast_fp16 = add(x = var_2085_cast_fp16, y = var_2086_to_fp16)[name = tensor<string, []>("op_2087_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_2087_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34568576)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34570176)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_2102 = const()[name = tensor<string, []>("op_2102"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2104 = const()[name = tensor<string, []>("op_2104"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34571776)))];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35751488)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_2104, groups = var_2069, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_2102, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_2108 = const()[name = tensor<string, []>("op_2108"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2110 = const()[name = tensor<string, []>("op_2110"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35753088)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_5_cast_fp16 = conv(dilations = var_2110, groups = var_2069, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_2108, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<int32, [2]> var_2115 = const()[name = tensor<string, []>("op_2115"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2117 = const()[name = tensor<string, []>("op_2117"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36932800)))];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38112512)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_2117, groups = var_2069, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_2115, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_2124_begin_0 = const()[name = tensor<string, []>("op_2124_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2124_end_0 = const()[name = tensor<string, []>("op_2124_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2124_end_mask_0 = const()[name = tensor<string, []>("op_2124_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2124_cast_fp16 = slice_by_index(begin = var_2124_begin_0, end = var_2124_end_0, end_mask = var_2124_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2124_cast_fp16")];
+            tensor<int32, [4]> var_2128_begin_0 = const()[name = tensor<string, []>("op_2128_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2128_end_0 = const()[name = tensor<string, []>("op_2128_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2128_end_mask_0 = const()[name = tensor<string, []>("op_2128_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2128_cast_fp16 = slice_by_index(begin = var_2128_begin_0, end = var_2128_end_0, end_mask = var_2128_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2128_cast_fp16")];
+            tensor<int32, [4]> var_2132_begin_0 = const()[name = tensor<string, []>("op_2132_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2132_end_0 = const()[name = tensor<string, []>("op_2132_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2132_end_mask_0 = const()[name = tensor<string, []>("op_2132_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2132_cast_fp16 = slice_by_index(begin = var_2132_begin_0, end = var_2132_end_0, end_mask = var_2132_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2132_cast_fp16")];
+            tensor<int32, [4]> var_2136_begin_0 = const()[name = tensor<string, []>("op_2136_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2136_end_0 = const()[name = tensor<string, []>("op_2136_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2136_end_mask_0 = const()[name = tensor<string, []>("op_2136_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2136_cast_fp16 = slice_by_index(begin = var_2136_begin_0, end = var_2136_end_0, end_mask = var_2136_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2136_cast_fp16")];
+            tensor<int32, [4]> var_2140_begin_0 = const()[name = tensor<string, []>("op_2140_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2140_end_0 = const()[name = tensor<string, []>("op_2140_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2140_end_mask_0 = const()[name = tensor<string, []>("op_2140_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2140_cast_fp16 = slice_by_index(begin = var_2140_begin_0, end = var_2140_end_0, end_mask = var_2140_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<int32, [4]> var_2144_begin_0 = const()[name = tensor<string, []>("op_2144_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2144_end_0 = const()[name = tensor<string, []>("op_2144_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2144_end_mask_0 = const()[name = tensor<string, []>("op_2144_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2144_cast_fp16 = slice_by_index(begin = var_2144_begin_0, end = var_2144_end_0, end_mask = var_2144_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<int32, [4]> var_2148_begin_0 = const()[name = tensor<string, []>("op_2148_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2148_end_0 = const()[name = tensor<string, []>("op_2148_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2148_end_mask_0 = const()[name = tensor<string, []>("op_2148_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2148_cast_fp16 = slice_by_index(begin = var_2148_begin_0, end = var_2148_end_0, end_mask = var_2148_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<int32, [4]> var_2152_begin_0 = const()[name = tensor<string, []>("op_2152_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2152_end_0 = const()[name = tensor<string, []>("op_2152_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2152_end_mask_0 = const()[name = tensor<string, []>("op_2152_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2152_cast_fp16 = slice_by_index(begin = var_2152_begin_0, end = var_2152_end_0, end_mask = var_2152_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<int32, [4]> var_2156_begin_0 = const()[name = tensor<string, []>("op_2156_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_2156_end_0 = const()[name = tensor<string, []>("op_2156_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_2156_end_mask_0 = const()[name = tensor<string, []>("op_2156_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2156_cast_fp16 = slice_by_index(begin = var_2156_begin_0, end = var_2156_end_0, end_mask = var_2156_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2156_cast_fp16")];
+            tensor<int32, [4]> var_2160_begin_0 = const()[name = tensor<string, []>("op_2160_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_2160_end_0 = const()[name = tensor<string, []>("op_2160_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_2160_end_mask_0 = const()[name = tensor<string, []>("op_2160_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2160_cast_fp16 = slice_by_index(begin = var_2160_begin_0, end = var_2160_end_0, end_mask = var_2160_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2160_cast_fp16")];
+            tensor<int32, [4]> var_2164_begin_0 = const()[name = tensor<string, []>("op_2164_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_2164_end_0 = const()[name = tensor<string, []>("op_2164_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_2164_end_mask_0 = const()[name = tensor<string, []>("op_2164_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<int32, [4]> var_2168_begin_0 = const()[name = tensor<string, []>("op_2168_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_2168_end_0 = const()[name = tensor<string, []>("op_2168_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_2168_end_mask_0 = const()[name = tensor<string, []>("op_2168_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<int32, [4]> var_2177_begin_0 = const()[name = tensor<string, []>("op_2177_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2177_end_0 = const()[name = tensor<string, []>("op_2177_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2177_end_mask_0 = const()[name = tensor<string, []>("op_2177_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2177_cast_fp16 = slice_by_index(begin = var_2177_begin_0, end = var_2177_end_0, end_mask = var_2177_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<int32, [4]> var_2184_begin_0 = const()[name = tensor<string, []>("op_2184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2184_end_0 = const()[name = tensor<string, []>("op_2184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2184_end_mask_0 = const()[name = tensor<string, []>("op_2184_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<int32, [4]> var_2191_begin_0 = const()[name = tensor<string, []>("op_2191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2191_end_0 = const()[name = tensor<string, []>("op_2191_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2191_end_mask_0 = const()[name = tensor<string, []>("op_2191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2191_cast_fp16 = slice_by_index(begin = var_2191_begin_0, end = var_2191_end_0, end_mask = var_2191_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2191_cast_fp16")];
+            tensor<int32, [4]> var_2198_begin_0 = const()[name = tensor<string, []>("op_2198_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2198_end_0 = const()[name = tensor<string, []>("op_2198_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2198_end_mask_0 = const()[name = tensor<string, []>("op_2198_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2198_cast_fp16 = slice_by_index(begin = var_2198_begin_0, end = var_2198_end_0, end_mask = var_2198_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<int32, [4]> var_2205_begin_0 = const()[name = tensor<string, []>("op_2205_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2205_end_0 = const()[name = tensor<string, []>("op_2205_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2205_end_mask_0 = const()[name = tensor<string, []>("op_2205_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2205_cast_fp16 = slice_by_index(begin = var_2205_begin_0, end = var_2205_end_0, end_mask = var_2205_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2205_cast_fp16")];
+            tensor<int32, [4]> var_2212_begin_0 = const()[name = tensor<string, []>("op_2212_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2212_end_0 = const()[name = tensor<string, []>("op_2212_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2212_end_mask_0 = const()[name = tensor<string, []>("op_2212_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<int32, [4]> var_2219_begin_0 = const()[name = tensor<string, []>("op_2219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2219_end_0 = const()[name = tensor<string, []>("op_2219_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2219_end_mask_0 = const()[name = tensor<string, []>("op_2219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2219_cast_fp16 = slice_by_index(begin = var_2219_begin_0, end = var_2219_end_0, end_mask = var_2219_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2219_cast_fp16")];
+            tensor<int32, [4]> var_2226_begin_0 = const()[name = tensor<string, []>("op_2226_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2226_end_0 = const()[name = tensor<string, []>("op_2226_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2226_end_mask_0 = const()[name = tensor<string, []>("op_2226_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2226_cast_fp16 = slice_by_index(begin = var_2226_begin_0, end = var_2226_end_0, end_mask = var_2226_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2226_cast_fp16")];
+            tensor<int32, [4]> var_2233_begin_0 = const()[name = tensor<string, []>("op_2233_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2233_end_0 = const()[name = tensor<string, []>("op_2233_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2233_end_mask_0 = const()[name = tensor<string, []>("op_2233_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2233_cast_fp16 = slice_by_index(begin = var_2233_begin_0, end = var_2233_end_0, end_mask = var_2233_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2233_cast_fp16")];
+            tensor<int32, [4]> var_2240_begin_0 = const()[name = tensor<string, []>("op_2240_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2240_end_0 = const()[name = tensor<string, []>("op_2240_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2240_end_mask_0 = const()[name = tensor<string, []>("op_2240_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2240_cast_fp16")];
+            tensor<int32, [4]> var_2247_begin_0 = const()[name = tensor<string, []>("op_2247_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2247_end_0 = const()[name = tensor<string, []>("op_2247_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2247_end_mask_0 = const()[name = tensor<string, []>("op_2247_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2247_cast_fp16 = slice_by_index(begin = var_2247_begin_0, end = var_2247_end_0, end_mask = var_2247_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2247_cast_fp16")];
+            tensor<int32, [4]> var_2254_begin_0 = const()[name = tensor<string, []>("op_2254_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2254_end_0 = const()[name = tensor<string, []>("op_2254_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2254_end_mask_0 = const()[name = tensor<string, []>("op_2254_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2254_cast_fp16 = slice_by_index(begin = var_2254_begin_0, end = var_2254_end_0, end_mask = var_2254_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2254_cast_fp16")];
+            tensor<int32, [4]> var_2261_begin_0 = const()[name = tensor<string, []>("op_2261_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2261_end_0 = const()[name = tensor<string, []>("op_2261_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2261_end_mask_0 = const()[name = tensor<string, []>("op_2261_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2261_cast_fp16 = slice_by_index(begin = var_2261_begin_0, end = var_2261_end_0, end_mask = var_2261_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2261_cast_fp16")];
+            tensor<int32, [4]> var_2268_begin_0 = const()[name = tensor<string, []>("op_2268_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2268_end_0 = const()[name = tensor<string, []>("op_2268_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2268_end_mask_0 = const()[name = tensor<string, []>("op_2268_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2268_cast_fp16")];
+            tensor<int32, [4]> var_2275_begin_0 = const()[name = tensor<string, []>("op_2275_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2275_end_0 = const()[name = tensor<string, []>("op_2275_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2275_end_mask_0 = const()[name = tensor<string, []>("op_2275_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2275_cast_fp16 = slice_by_index(begin = var_2275_begin_0, end = var_2275_end_0, end_mask = var_2275_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<int32, [4]> var_2282_begin_0 = const()[name = tensor<string, []>("op_2282_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2282_end_0 = const()[name = tensor<string, []>("op_2282_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2282_end_mask_0 = const()[name = tensor<string, []>("op_2282_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2282_cast_fp16 = slice_by_index(begin = var_2282_begin_0, end = var_2282_end_0, end_mask = var_2282_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2282_cast_fp16")];
+            tensor<int32, [4]> var_2289_begin_0 = const()[name = tensor<string, []>("op_2289_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2289_end_0 = const()[name = tensor<string, []>("op_2289_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2289_end_mask_0 = const()[name = tensor<string, []>("op_2289_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2289_cast_fp16 = slice_by_index(begin = var_2289_begin_0, end = var_2289_end_0, end_mask = var_2289_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2289_cast_fp16")];
+            tensor<int32, [4]> var_2296_begin_0 = const()[name = tensor<string, []>("op_2296_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2296_end_0 = const()[name = tensor<string, []>("op_2296_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2296_end_mask_0 = const()[name = tensor<string, []>("op_2296_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2296_cast_fp16 = slice_by_index(begin = var_2296_begin_0, end = var_2296_end_0, end_mask = var_2296_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2296_cast_fp16")];
+            tensor<int32, [4]> var_2303_begin_0 = const()[name = tensor<string, []>("op_2303_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2303_end_0 = const()[name = tensor<string, []>("op_2303_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2303_end_mask_0 = const()[name = tensor<string, []>("op_2303_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2303_cast_fp16 = slice_by_index(begin = var_2303_begin_0, end = var_2303_end_0, end_mask = var_2303_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2303_cast_fp16")];
+            tensor<int32, [4]> var_2310_begin_0 = const()[name = tensor<string, []>("op_2310_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2310_end_0 = const()[name = tensor<string, []>("op_2310_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2310_end_mask_0 = const()[name = tensor<string, []>("op_2310_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2310_cast_fp16")];
+            tensor<int32, [4]> var_2317_begin_0 = const()[name = tensor<string, []>("op_2317_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2317_end_0 = const()[name = tensor<string, []>("op_2317_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2317_end_mask_0 = const()[name = tensor<string, []>("op_2317_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2317_cast_fp16 = slice_by_index(begin = var_2317_begin_0, end = var_2317_end_0, end_mask = var_2317_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2317_cast_fp16")];
+            tensor<int32, [4]> var_2324_begin_0 = const()[name = tensor<string, []>("op_2324_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2324_end_0 = const()[name = tensor<string, []>("op_2324_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2324_end_mask_0 = const()[name = tensor<string, []>("op_2324_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2324_cast_fp16 = slice_by_index(begin = var_2324_begin_0, end = var_2324_end_0, end_mask = var_2324_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2324_cast_fp16")];
+            tensor<int32, [4]> var_2331_begin_0 = const()[name = tensor<string, []>("op_2331_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2331_end_0 = const()[name = tensor<string, []>("op_2331_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2331_end_mask_0 = const()[name = tensor<string, []>("op_2331_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2331_cast_fp16 = slice_by_index(begin = var_2331_begin_0, end = var_2331_end_0, end_mask = var_2331_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2331_cast_fp16")];
+            tensor<int32, [4]> var_2338_begin_0 = const()[name = tensor<string, []>("op_2338_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2338_end_0 = const()[name = tensor<string, []>("op_2338_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2338_end_mask_0 = const()[name = tensor<string, []>("op_2338_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2338_cast_fp16")];
+            tensor<int32, [4]> var_2345_begin_0 = const()[name = tensor<string, []>("op_2345_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2345_end_0 = const()[name = tensor<string, []>("op_2345_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2345_end_mask_0 = const()[name = tensor<string, []>("op_2345_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2345_cast_fp16 = slice_by_index(begin = var_2345_begin_0, end = var_2345_end_0, end_mask = var_2345_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2345_cast_fp16")];
+            tensor<int32, [4]> var_2352_begin_0 = const()[name = tensor<string, []>("op_2352_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2352_end_0 = const()[name = tensor<string, []>("op_2352_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2352_end_mask_0 = const()[name = tensor<string, []>("op_2352_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2352_cast_fp16 = slice_by_index(begin = var_2352_begin_0, end = var_2352_end_0, end_mask = var_2352_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2352_cast_fp16")];
+            tensor<int32, [4]> var_2359_begin_0 = const()[name = tensor<string, []>("op_2359_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2359_end_0 = const()[name = tensor<string, []>("op_2359_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2359_end_mask_0 = const()[name = tensor<string, []>("op_2359_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2359_cast_fp16 = slice_by_index(begin = var_2359_begin_0, end = var_2359_end_0, end_mask = var_2359_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2359_cast_fp16")];
+            tensor<int32, [4]> var_2366_begin_0 = const()[name = tensor<string, []>("op_2366_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2366_end_0 = const()[name = tensor<string, []>("op_2366_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2366_end_mask_0 = const()[name = tensor<string, []>("op_2366_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2366_cast_fp16")];
+            tensor<int32, [4]> var_2373_begin_0 = const()[name = tensor<string, []>("op_2373_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2373_end_0 = const()[name = tensor<string, []>("op_2373_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2373_end_mask_0 = const()[name = tensor<string, []>("op_2373_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2373_cast_fp16 = slice_by_index(begin = var_2373_begin_0, end = var_2373_end_0, end_mask = var_2373_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2373_cast_fp16")];
+            tensor<int32, [4]> var_2380_begin_0 = const()[name = tensor<string, []>("op_2380_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2380_end_0 = const()[name = tensor<string, []>("op_2380_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2380_end_mask_0 = const()[name = tensor<string, []>("op_2380_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2380_cast_fp16 = slice_by_index(begin = var_2380_begin_0, end = var_2380_end_0, end_mask = var_2380_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2380_cast_fp16")];
+            tensor<int32, [4]> var_2387_begin_0 = const()[name = tensor<string, []>("op_2387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2387_end_0 = const()[name = tensor<string, []>("op_2387_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2387_end_mask_0 = const()[name = tensor<string, []>("op_2387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2387_cast_fp16 = slice_by_index(begin = var_2387_begin_0, end = var_2387_end_0, end_mask = var_2387_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2387_cast_fp16")];
+            tensor<int32, [4]> var_2394_begin_0 = const()[name = tensor<string, []>("op_2394_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2394_end_0 = const()[name = tensor<string, []>("op_2394_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2394_end_mask_0 = const()[name = tensor<string, []>("op_2394_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2394_cast_fp16")];
+            tensor<int32, [4]> var_2401_begin_0 = const()[name = tensor<string, []>("op_2401_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2401_end_0 = const()[name = tensor<string, []>("op_2401_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2401_end_mask_0 = const()[name = tensor<string, []>("op_2401_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2401_cast_fp16 = slice_by_index(begin = var_2401_begin_0, end = var_2401_end_0, end_mask = var_2401_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2401_cast_fp16")];
+            tensor<int32, [4]> var_2408_begin_0 = const()[name = tensor<string, []>("op_2408_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2408_end_0 = const()[name = tensor<string, []>("op_2408_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2408_end_mask_0 = const()[name = tensor<string, []>("op_2408_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2408_cast_fp16 = slice_by_index(begin = var_2408_begin_0, end = var_2408_end_0, end_mask = var_2408_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2408_cast_fp16")];
+            tensor<int32, [4]> var_2415_begin_0 = const()[name = tensor<string, []>("op_2415_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2415_end_0 = const()[name = tensor<string, []>("op_2415_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2415_end_mask_0 = const()[name = tensor<string, []>("op_2415_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2415_cast_fp16 = slice_by_index(begin = var_2415_begin_0, end = var_2415_end_0, end_mask = var_2415_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2415_cast_fp16")];
+            tensor<int32, [4]> var_2422_begin_0 = const()[name = tensor<string, []>("op_2422_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2422_end_0 = const()[name = tensor<string, []>("op_2422_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2422_end_mask_0 = const()[name = tensor<string, []>("op_2422_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2422_cast_fp16 = slice_by_index(begin = var_2422_begin_0, end = var_2422_end_0, end_mask = var_2422_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2422_cast_fp16")];
+            tensor<int32, [4]> var_2429_begin_0 = const()[name = tensor<string, []>("op_2429_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2429_end_0 = const()[name = tensor<string, []>("op_2429_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2429_end_mask_0 = const()[name = tensor<string, []>("op_2429_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2429_cast_fp16 = slice_by_index(begin = var_2429_begin_0, end = var_2429_end_0, end_mask = var_2429_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2429_cast_fp16")];
+            tensor<int32, [4]> var_2436_begin_0 = const()[name = tensor<string, []>("op_2436_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2436_end_0 = const()[name = tensor<string, []>("op_2436_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2436_end_mask_0 = const()[name = tensor<string, []>("op_2436_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2436_cast_fp16 = slice_by_index(begin = var_2436_begin_0, end = var_2436_end_0, end_mask = var_2436_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<int32, [4]> var_2443_begin_0 = const()[name = tensor<string, []>("op_2443_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2443_end_0 = const()[name = tensor<string, []>("op_2443_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2443_end_mask_0 = const()[name = tensor<string, []>("op_2443_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2443_cast_fp16 = slice_by_index(begin = var_2443_begin_0, end = var_2443_end_0, end_mask = var_2443_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2443_cast_fp16")];
+            tensor<int32, [4]> var_2450_begin_0 = const()[name = tensor<string, []>("op_2450_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2450_end_0 = const()[name = tensor<string, []>("op_2450_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2450_end_mask_0 = const()[name = tensor<string, []>("op_2450_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2450_cast_fp16 = slice_by_index(begin = var_2450_begin_0, end = var_2450_end_0, end_mask = var_2450_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<int32, [4]> var_2457_begin_0 = const()[name = tensor<string, []>("op_2457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2457_end_0 = const()[name = tensor<string, []>("op_2457_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2457_end_mask_0 = const()[name = tensor<string, []>("op_2457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2457_cast_fp16 = slice_by_index(begin = var_2457_begin_0, end = var_2457_end_0, end_mask = var_2457_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<int32, [4]> var_2464_begin_0 = const()[name = tensor<string, []>("op_2464_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2464_end_0 = const()[name = tensor<string, []>("op_2464_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2464_end_mask_0 = const()[name = tensor<string, []>("op_2464_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2464_cast_fp16 = slice_by_index(begin = var_2464_begin_0, end = var_2464_end_0, end_mask = var_2464_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2464_cast_fp16")];
+            tensor<int32, [4]> var_2471_begin_0 = const()[name = tensor<string, []>("op_2471_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2471_end_0 = const()[name = tensor<string, []>("op_2471_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2471_end_mask_0 = const()[name = tensor<string, []>("op_2471_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2471_cast_fp16 = slice_by_index(begin = var_2471_begin_0, end = var_2471_end_0, end_mask = var_2471_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2471_cast_fp16")];
+            tensor<int32, [4]> var_2478_begin_0 = const()[name = tensor<string, []>("op_2478_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2478_end_0 = const()[name = tensor<string, []>("op_2478_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2478_end_mask_0 = const()[name = tensor<string, []>("op_2478_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2478_cast_fp16 = slice_by_index(begin = var_2478_begin_0, end = var_2478_end_0, end_mask = var_2478_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<int32, [4]> var_2485_begin_0 = const()[name = tensor<string, []>("op_2485_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2485_end_0 = const()[name = tensor<string, []>("op_2485_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2485_end_mask_0 = const()[name = tensor<string, []>("op_2485_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2485_cast_fp16 = slice_by_index(begin = var_2485_begin_0, end = var_2485_end_0, end_mask = var_2485_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2485_cast_fp16")];
+            tensor<int32, [4]> var_2492_begin_0 = const()[name = tensor<string, []>("op_2492_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2492_end_0 = const()[name = tensor<string, []>("op_2492_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2492_end_mask_0 = const()[name = tensor<string, []>("op_2492_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2492_cast_fp16 = slice_by_index(begin = var_2492_begin_0, end = var_2492_end_0, end_mask = var_2492_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2492_cast_fp16")];
+            tensor<int32, [4]> var_2499_begin_0 = const()[name = tensor<string, []>("op_2499_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2499_end_0 = const()[name = tensor<string, []>("op_2499_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2499_end_mask_0 = const()[name = tensor<string, []>("op_2499_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2499_cast_fp16 = slice_by_index(begin = var_2499_begin_0, end = var_2499_end_0, end_mask = var_2499_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2499_cast_fp16")];
+            tensor<int32, [4]> var_2506_begin_0 = const()[name = tensor<string, []>("op_2506_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2506_end_0 = const()[name = tensor<string, []>("op_2506_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2506_end_mask_0 = const()[name = tensor<string, []>("op_2506_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2506_cast_fp16 = slice_by_index(begin = var_2506_begin_0, end = var_2506_end_0, end_mask = var_2506_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2506_cast_fp16")];
+            tensor<int32, [4]> k_5_perm_0 = const()[name = tensor<string, []>("k_5_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_2511_begin_0 = const()[name = tensor<string, []>("op_2511_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2511_end_0 = const()[name = tensor<string, []>("op_2511_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_2511_end_mask_0 = const()[name = tensor<string, []>("op_2511_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_9 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2511_cast_fp16 = slice_by_index(begin = var_2511_begin_0, end = var_2511_end_0, end_mask = var_2511_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2511_cast_fp16")];
+            tensor<int32, [4]> var_2515_begin_0 = const()[name = tensor<string, []>("op_2515_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_2515_end_0 = const()[name = tensor<string, []>("op_2515_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_2515_end_mask_0 = const()[name = tensor<string, []>("op_2515_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2515_cast_fp16 = slice_by_index(begin = var_2515_begin_0, end = var_2515_end_0, end_mask = var_2515_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2515_cast_fp16")];
+            tensor<int32, [4]> var_2519_begin_0 = const()[name = tensor<string, []>("op_2519_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_2519_end_0 = const()[name = tensor<string, []>("op_2519_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_2519_end_mask_0 = const()[name = tensor<string, []>("op_2519_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2519_cast_fp16 = slice_by_index(begin = var_2519_begin_0, end = var_2519_end_0, end_mask = var_2519_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2519_cast_fp16")];
+            tensor<int32, [4]> var_2523_begin_0 = const()[name = tensor<string, []>("op_2523_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_2523_end_0 = const()[name = tensor<string, []>("op_2523_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_2523_end_mask_0 = const()[name = tensor<string, []>("op_2523_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2523_cast_fp16 = slice_by_index(begin = var_2523_begin_0, end = var_2523_end_0, end_mask = var_2523_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2523_cast_fp16")];
+            tensor<int32, [4]> var_2527_begin_0 = const()[name = tensor<string, []>("op_2527_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_2527_end_0 = const()[name = tensor<string, []>("op_2527_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_2527_end_mask_0 = const()[name = tensor<string, []>("op_2527_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2527_cast_fp16 = slice_by_index(begin = var_2527_begin_0, end = var_2527_end_0, end_mask = var_2527_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2527_cast_fp16")];
+            tensor<int32, [4]> var_2531_begin_0 = const()[name = tensor<string, []>("op_2531_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_2531_end_0 = const()[name = tensor<string, []>("op_2531_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_2531_end_mask_0 = const()[name = tensor<string, []>("op_2531_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2531_cast_fp16 = slice_by_index(begin = var_2531_begin_0, end = var_2531_end_0, end_mask = var_2531_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2531_cast_fp16")];
+            tensor<int32, [4]> var_2535_begin_0 = const()[name = tensor<string, []>("op_2535_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_2535_end_0 = const()[name = tensor<string, []>("op_2535_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_2535_end_mask_0 = const()[name = tensor<string, []>("op_2535_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2535_cast_fp16 = slice_by_index(begin = var_2535_begin_0, end = var_2535_end_0, end_mask = var_2535_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2535_cast_fp16")];
+            tensor<int32, [4]> var_2539_begin_0 = const()[name = tensor<string, []>("op_2539_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_2539_end_0 = const()[name = tensor<string, []>("op_2539_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_2539_end_mask_0 = const()[name = tensor<string, []>("op_2539_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2539_cast_fp16 = slice_by_index(begin = var_2539_begin_0, end = var_2539_end_0, end_mask = var_2539_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2539_cast_fp16")];
+            tensor<int32, [4]> var_2543_begin_0 = const()[name = tensor<string, []>("op_2543_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_2543_end_0 = const()[name = tensor<string, []>("op_2543_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_2543_end_mask_0 = const()[name = tensor<string, []>("op_2543_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2543_cast_fp16 = slice_by_index(begin = var_2543_begin_0, end = var_2543_end_0, end_mask = var_2543_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2543_cast_fp16")];
+            tensor<int32, [4]> var_2547_begin_0 = const()[name = tensor<string, []>("op_2547_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_2547_end_0 = const()[name = tensor<string, []>("op_2547_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_2547_end_mask_0 = const()[name = tensor<string, []>("op_2547_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2547_cast_fp16 = slice_by_index(begin = var_2547_begin_0, end = var_2547_end_0, end_mask = var_2547_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2547_cast_fp16")];
+            tensor<int32, [4]> var_2551_begin_0 = const()[name = tensor<string, []>("op_2551_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_2551_end_0 = const()[name = tensor<string, []>("op_2551_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_2551_end_mask_0 = const()[name = tensor<string, []>("op_2551_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2551_cast_fp16 = slice_by_index(begin = var_2551_begin_0, end = var_2551_end_0, end_mask = var_2551_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2551_cast_fp16")];
+            tensor<int32, [4]> var_2555_begin_0 = const()[name = tensor<string, []>("op_2555_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_2555_end_0 = const()[name = tensor<string, []>("op_2555_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_2555_end_mask_0 = const()[name = tensor<string, []>("op_2555_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2555_cast_fp16 = slice_by_index(begin = var_2555_begin_0, end = var_2555_end_0, end_mask = var_2555_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2555_cast_fp16")];
+            tensor<int32, [4]> var_2557_begin_0 = const()[name = tensor<string, []>("op_2557_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2557_end_0 = const()[name = tensor<string, []>("op_2557_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2557_end_mask_0 = const()[name = tensor<string, []>("op_2557_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = var_2557_end_0, end_mask = var_2557_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2557_cast_fp16")];
+            tensor<int32, [4]> var_2561_begin_0 = const()[name = tensor<string, []>("op_2561_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2561_end_0 = const()[name = tensor<string, []>("op_2561_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2561_end_mask_0 = const()[name = tensor<string, []>("op_2561_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2561_cast_fp16 = slice_by_index(begin = var_2561_begin_0, end = var_2561_end_0, end_mask = var_2561_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2561_cast_fp16")];
+            tensor<int32, [4]> var_2565_begin_0 = const()[name = tensor<string, []>("op_2565_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2565_end_0 = const()[name = tensor<string, []>("op_2565_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2565_end_mask_0 = const()[name = tensor<string, []>("op_2565_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16 = slice_by_index(begin = var_2565_begin_0, end = var_2565_end_0, end_mask = var_2565_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2565_cast_fp16")];
+            tensor<int32, [4]> var_2569_begin_0 = const()[name = tensor<string, []>("op_2569_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2569_end_0 = const()[name = tensor<string, []>("op_2569_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2569_end_mask_0 = const()[name = tensor<string, []>("op_2569_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2569_cast_fp16 = slice_by_index(begin = var_2569_begin_0, end = var_2569_end_0, end_mask = var_2569_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2569_cast_fp16")];
+            tensor<int32, [4]> var_2573_begin_0 = const()[name = tensor<string, []>("op_2573_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2573_end_0 = const()[name = tensor<string, []>("op_2573_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2573_end_mask_0 = const()[name = tensor<string, []>("op_2573_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2573_cast_fp16 = slice_by_index(begin = var_2573_begin_0, end = var_2573_end_0, end_mask = var_2573_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2573_cast_fp16")];
+            tensor<int32, [4]> var_2577_begin_0 = const()[name = tensor<string, []>("op_2577_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2577_end_0 = const()[name = tensor<string, []>("op_2577_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2577_end_mask_0 = const()[name = tensor<string, []>("op_2577_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2577_cast_fp16 = slice_by_index(begin = var_2577_begin_0, end = var_2577_end_0, end_mask = var_2577_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2577_cast_fp16")];
+            tensor<int32, [4]> var_2581_begin_0 = const()[name = tensor<string, []>("op_2581_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2581_end_0 = const()[name = tensor<string, []>("op_2581_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2581_end_mask_0 = const()[name = tensor<string, []>("op_2581_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2581_cast_fp16 = slice_by_index(begin = var_2581_begin_0, end = var_2581_end_0, end_mask = var_2581_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2581_cast_fp16")];
+            tensor<int32, [4]> var_2585_begin_0 = const()[name = tensor<string, []>("op_2585_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2585_end_0 = const()[name = tensor<string, []>("op_2585_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2585_end_mask_0 = const()[name = tensor<string, []>("op_2585_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2585_cast_fp16 = slice_by_index(begin = var_2585_begin_0, end = var_2585_end_0, end_mask = var_2585_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2585_cast_fp16")];
+            tensor<int32, [4]> var_2589_begin_0 = const()[name = tensor<string, []>("op_2589_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_2589_end_0 = const()[name = tensor<string, []>("op_2589_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_2589_end_mask_0 = const()[name = tensor<string, []>("op_2589_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = var_2589_end_0, end_mask = var_2589_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2589_cast_fp16")];
+            tensor<int32, [4]> var_2593_begin_0 = const()[name = tensor<string, []>("op_2593_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_2593_end_0 = const()[name = tensor<string, []>("op_2593_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_2593_end_mask_0 = const()[name = tensor<string, []>("op_2593_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2593_cast_fp16 = slice_by_index(begin = var_2593_begin_0, end = var_2593_end_0, end_mask = var_2593_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2593_cast_fp16")];
+            tensor<int32, [4]> var_2597_begin_0 = const()[name = tensor<string, []>("op_2597_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_2597_end_0 = const()[name = tensor<string, []>("op_2597_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_2597_end_mask_0 = const()[name = tensor<string, []>("op_2597_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16 = slice_by_index(begin = var_2597_begin_0, end = var_2597_end_0, end_mask = var_2597_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2597_cast_fp16")];
+            tensor<int32, [4]> var_2601_begin_0 = const()[name = tensor<string, []>("op_2601_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_2601_end_0 = const()[name = tensor<string, []>("op_2601_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_2601_end_mask_0 = const()[name = tensor<string, []>("op_2601_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2601_cast_fp16 = slice_by_index(begin = var_2601_begin_0, end = var_2601_end_0, end_mask = var_2601_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2601_cast_fp16")];
+            tensor<string, []> var_2605_equation_0 = const()[name = tensor<string, []>("op_2605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2605_cast_fp16 = einsum(equation = var_2605_equation_0, values = (var_2511_cast_fp16, var_2177_cast_fp16))[name = tensor<string, []>("op_2605_cast_fp16")];
+            tensor<fp16, []> var_2606_to_fp16 = const()[name = tensor<string, []>("op_2606_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_193_cast_fp16 = mul(x = var_2605_cast_fp16, y = var_2606_to_fp16)[name = tensor<string, []>("aw_chunk_193_cast_fp16")];
+            tensor<string, []> var_2609_equation_0 = const()[name = tensor<string, []>("op_2609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2609_cast_fp16 = einsum(equation = var_2609_equation_0, values = (var_2511_cast_fp16, var_2184_cast_fp16))[name = tensor<string, []>("op_2609_cast_fp16")];
+            tensor<fp16, []> var_2610_to_fp16 = const()[name = tensor<string, []>("op_2610_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_195_cast_fp16 = mul(x = var_2609_cast_fp16, y = var_2610_to_fp16)[name = tensor<string, []>("aw_chunk_195_cast_fp16")];
+            tensor<string, []> var_2613_equation_0 = const()[name = tensor<string, []>("op_2613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2613_cast_fp16 = einsum(equation = var_2613_equation_0, values = (var_2511_cast_fp16, var_2191_cast_fp16))[name = tensor<string, []>("op_2613_cast_fp16")];
+            tensor<fp16, []> var_2614_to_fp16 = const()[name = tensor<string, []>("op_2614_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_197_cast_fp16 = mul(x = var_2613_cast_fp16, y = var_2614_to_fp16)[name = tensor<string, []>("aw_chunk_197_cast_fp16")];
+            tensor<string, []> var_2617_equation_0 = const()[name = tensor<string, []>("op_2617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2617_cast_fp16 = einsum(equation = var_2617_equation_0, values = (var_2511_cast_fp16, var_2198_cast_fp16))[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<fp16, []> var_2618_to_fp16 = const()[name = tensor<string, []>("op_2618_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_199_cast_fp16 = mul(x = var_2617_cast_fp16, y = var_2618_to_fp16)[name = tensor<string, []>("aw_chunk_199_cast_fp16")];
+            tensor<string, []> var_2621_equation_0 = const()[name = tensor<string, []>("op_2621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2621_cast_fp16 = einsum(equation = var_2621_equation_0, values = (var_2515_cast_fp16, var_2205_cast_fp16))[name = tensor<string, []>("op_2621_cast_fp16")];
+            tensor<fp16, []> var_2622_to_fp16 = const()[name = tensor<string, []>("op_2622_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_201_cast_fp16 = mul(x = var_2621_cast_fp16, y = var_2622_to_fp16)[name = tensor<string, []>("aw_chunk_201_cast_fp16")];
+            tensor<string, []> var_2625_equation_0 = const()[name = tensor<string, []>("op_2625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2625_cast_fp16 = einsum(equation = var_2625_equation_0, values = (var_2515_cast_fp16, var_2212_cast_fp16))[name = tensor<string, []>("op_2625_cast_fp16")];
+            tensor<fp16, []> var_2626_to_fp16 = const()[name = tensor<string, []>("op_2626_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_203_cast_fp16 = mul(x = var_2625_cast_fp16, y = var_2626_to_fp16)[name = tensor<string, []>("aw_chunk_203_cast_fp16")];
+            tensor<string, []> var_2629_equation_0 = const()[name = tensor<string, []>("op_2629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2629_cast_fp16 = einsum(equation = var_2629_equation_0, values = (var_2515_cast_fp16, var_2219_cast_fp16))[name = tensor<string, []>("op_2629_cast_fp16")];
+            tensor<fp16, []> var_2630_to_fp16 = const()[name = tensor<string, []>("op_2630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_205_cast_fp16 = mul(x = var_2629_cast_fp16, y = var_2630_to_fp16)[name = tensor<string, []>("aw_chunk_205_cast_fp16")];
+            tensor<string, []> var_2633_equation_0 = const()[name = tensor<string, []>("op_2633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2633_cast_fp16 = einsum(equation = var_2633_equation_0, values = (var_2515_cast_fp16, var_2226_cast_fp16))[name = tensor<string, []>("op_2633_cast_fp16")];
+            tensor<fp16, []> var_2634_to_fp16 = const()[name = tensor<string, []>("op_2634_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_207_cast_fp16 = mul(x = var_2633_cast_fp16, y = var_2634_to_fp16)[name = tensor<string, []>("aw_chunk_207_cast_fp16")];
+            tensor<string, []> var_2637_equation_0 = const()[name = tensor<string, []>("op_2637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2637_cast_fp16 = einsum(equation = var_2637_equation_0, values = (var_2519_cast_fp16, var_2233_cast_fp16))[name = tensor<string, []>("op_2637_cast_fp16")];
+            tensor<fp16, []> var_2638_to_fp16 = const()[name = tensor<string, []>("op_2638_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_209_cast_fp16 = mul(x = var_2637_cast_fp16, y = var_2638_to_fp16)[name = tensor<string, []>("aw_chunk_209_cast_fp16")];
+            tensor<string, []> var_2641_equation_0 = const()[name = tensor<string, []>("op_2641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2641_cast_fp16 = einsum(equation = var_2641_equation_0, values = (var_2519_cast_fp16, var_2240_cast_fp16))[name = tensor<string, []>("op_2641_cast_fp16")];
+            tensor<fp16, []> var_2642_to_fp16 = const()[name = tensor<string, []>("op_2642_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_211_cast_fp16 = mul(x = var_2641_cast_fp16, y = var_2642_to_fp16)[name = tensor<string, []>("aw_chunk_211_cast_fp16")];
+            tensor<string, []> var_2645_equation_0 = const()[name = tensor<string, []>("op_2645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2645_cast_fp16 = einsum(equation = var_2645_equation_0, values = (var_2519_cast_fp16, var_2247_cast_fp16))[name = tensor<string, []>("op_2645_cast_fp16")];
+            tensor<fp16, []> var_2646_to_fp16 = const()[name = tensor<string, []>("op_2646_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_213_cast_fp16 = mul(x = var_2645_cast_fp16, y = var_2646_to_fp16)[name = tensor<string, []>("aw_chunk_213_cast_fp16")];
+            tensor<string, []> var_2649_equation_0 = const()[name = tensor<string, []>("op_2649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2649_cast_fp16 = einsum(equation = var_2649_equation_0, values = (var_2519_cast_fp16, var_2254_cast_fp16))[name = tensor<string, []>("op_2649_cast_fp16")];
+            tensor<fp16, []> var_2650_to_fp16 = const()[name = tensor<string, []>("op_2650_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_215_cast_fp16 = mul(x = var_2649_cast_fp16, y = var_2650_to_fp16)[name = tensor<string, []>("aw_chunk_215_cast_fp16")];
+            tensor<string, []> var_2653_equation_0 = const()[name = tensor<string, []>("op_2653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2653_cast_fp16 = einsum(equation = var_2653_equation_0, values = (var_2523_cast_fp16, var_2261_cast_fp16))[name = tensor<string, []>("op_2653_cast_fp16")];
+            tensor<fp16, []> var_2654_to_fp16 = const()[name = tensor<string, []>("op_2654_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_217_cast_fp16 = mul(x = var_2653_cast_fp16, y = var_2654_to_fp16)[name = tensor<string, []>("aw_chunk_217_cast_fp16")];
+            tensor<string, []> var_2657_equation_0 = const()[name = tensor<string, []>("op_2657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2657_cast_fp16 = einsum(equation = var_2657_equation_0, values = (var_2523_cast_fp16, var_2268_cast_fp16))[name = tensor<string, []>("op_2657_cast_fp16")];
+            tensor<fp16, []> var_2658_to_fp16 = const()[name = tensor<string, []>("op_2658_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_219_cast_fp16 = mul(x = var_2657_cast_fp16, y = var_2658_to_fp16)[name = tensor<string, []>("aw_chunk_219_cast_fp16")];
+            tensor<string, []> var_2661_equation_0 = const()[name = tensor<string, []>("op_2661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2661_cast_fp16 = einsum(equation = var_2661_equation_0, values = (var_2523_cast_fp16, var_2275_cast_fp16))[name = tensor<string, []>("op_2661_cast_fp16")];
+            tensor<fp16, []> var_2662_to_fp16 = const()[name = tensor<string, []>("op_2662_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_221_cast_fp16 = mul(x = var_2661_cast_fp16, y = var_2662_to_fp16)[name = tensor<string, []>("aw_chunk_221_cast_fp16")];
+            tensor<string, []> var_2665_equation_0 = const()[name = tensor<string, []>("op_2665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2665_cast_fp16 = einsum(equation = var_2665_equation_0, values = (var_2523_cast_fp16, var_2282_cast_fp16))[name = tensor<string, []>("op_2665_cast_fp16")];
+            tensor<fp16, []> var_2666_to_fp16 = const()[name = tensor<string, []>("op_2666_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_223_cast_fp16 = mul(x = var_2665_cast_fp16, y = var_2666_to_fp16)[name = tensor<string, []>("aw_chunk_223_cast_fp16")];
+            tensor<string, []> var_2669_equation_0 = const()[name = tensor<string, []>("op_2669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2669_cast_fp16 = einsum(equation = var_2669_equation_0, values = (var_2527_cast_fp16, var_2289_cast_fp16))[name = tensor<string, []>("op_2669_cast_fp16")];
+            tensor<fp16, []> var_2670_to_fp16 = const()[name = tensor<string, []>("op_2670_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_225_cast_fp16 = mul(x = var_2669_cast_fp16, y = var_2670_to_fp16)[name = tensor<string, []>("aw_chunk_225_cast_fp16")];
+            tensor<string, []> var_2673_equation_0 = const()[name = tensor<string, []>("op_2673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2673_cast_fp16 = einsum(equation = var_2673_equation_0, values = (var_2527_cast_fp16, var_2296_cast_fp16))[name = tensor<string, []>("op_2673_cast_fp16")];
+            tensor<fp16, []> var_2674_to_fp16 = const()[name = tensor<string, []>("op_2674_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_227_cast_fp16 = mul(x = var_2673_cast_fp16, y = var_2674_to_fp16)[name = tensor<string, []>("aw_chunk_227_cast_fp16")];
+            tensor<string, []> var_2677_equation_0 = const()[name = tensor<string, []>("op_2677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2677_cast_fp16 = einsum(equation = var_2677_equation_0, values = (var_2527_cast_fp16, var_2303_cast_fp16))[name = tensor<string, []>("op_2677_cast_fp16")];
+            tensor<fp16, []> var_2678_to_fp16 = const()[name = tensor<string, []>("op_2678_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_229_cast_fp16 = mul(x = var_2677_cast_fp16, y = var_2678_to_fp16)[name = tensor<string, []>("aw_chunk_229_cast_fp16")];
+            tensor<string, []> var_2681_equation_0 = const()[name = tensor<string, []>("op_2681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2681_cast_fp16 = einsum(equation = var_2681_equation_0, values = (var_2527_cast_fp16, var_2310_cast_fp16))[name = tensor<string, []>("op_2681_cast_fp16")];
+            tensor<fp16, []> var_2682_to_fp16 = const()[name = tensor<string, []>("op_2682_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_231_cast_fp16 = mul(x = var_2681_cast_fp16, y = var_2682_to_fp16)[name = tensor<string, []>("aw_chunk_231_cast_fp16")];
+            tensor<string, []> var_2685_equation_0 = const()[name = tensor<string, []>("op_2685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2685_cast_fp16 = einsum(equation = var_2685_equation_0, values = (var_2531_cast_fp16, var_2317_cast_fp16))[name = tensor<string, []>("op_2685_cast_fp16")];
+            tensor<fp16, []> var_2686_to_fp16 = const()[name = tensor<string, []>("op_2686_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_233_cast_fp16 = mul(x = var_2685_cast_fp16, y = var_2686_to_fp16)[name = tensor<string, []>("aw_chunk_233_cast_fp16")];
+            tensor<string, []> var_2689_equation_0 = const()[name = tensor<string, []>("op_2689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2689_cast_fp16 = einsum(equation = var_2689_equation_0, values = (var_2531_cast_fp16, var_2324_cast_fp16))[name = tensor<string, []>("op_2689_cast_fp16")];
+            tensor<fp16, []> var_2690_to_fp16 = const()[name = tensor<string, []>("op_2690_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_235_cast_fp16 = mul(x = var_2689_cast_fp16, y = var_2690_to_fp16)[name = tensor<string, []>("aw_chunk_235_cast_fp16")];
+            tensor<string, []> var_2693_equation_0 = const()[name = tensor<string, []>("op_2693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2693_cast_fp16 = einsum(equation = var_2693_equation_0, values = (var_2531_cast_fp16, var_2331_cast_fp16))[name = tensor<string, []>("op_2693_cast_fp16")];
+            tensor<fp16, []> var_2694_to_fp16 = const()[name = tensor<string, []>("op_2694_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_237_cast_fp16 = mul(x = var_2693_cast_fp16, y = var_2694_to_fp16)[name = tensor<string, []>("aw_chunk_237_cast_fp16")];
+            tensor<string, []> var_2697_equation_0 = const()[name = tensor<string, []>("op_2697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2697_cast_fp16 = einsum(equation = var_2697_equation_0, values = (var_2531_cast_fp16, var_2338_cast_fp16))[name = tensor<string, []>("op_2697_cast_fp16")];
+            tensor<fp16, []> var_2698_to_fp16 = const()[name = tensor<string, []>("op_2698_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_239_cast_fp16 = mul(x = var_2697_cast_fp16, y = var_2698_to_fp16)[name = tensor<string, []>("aw_chunk_239_cast_fp16")];
+            tensor<string, []> var_2701_equation_0 = const()[name = tensor<string, []>("op_2701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2701_cast_fp16 = einsum(equation = var_2701_equation_0, values = (var_2535_cast_fp16, var_2345_cast_fp16))[name = tensor<string, []>("op_2701_cast_fp16")];
+            tensor<fp16, []> var_2702_to_fp16 = const()[name = tensor<string, []>("op_2702_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_241_cast_fp16 = mul(x = var_2701_cast_fp16, y = var_2702_to_fp16)[name = tensor<string, []>("aw_chunk_241_cast_fp16")];
+            tensor<string, []> var_2705_equation_0 = const()[name = tensor<string, []>("op_2705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2705_cast_fp16 = einsum(equation = var_2705_equation_0, values = (var_2535_cast_fp16, var_2352_cast_fp16))[name = tensor<string, []>("op_2705_cast_fp16")];
+            tensor<fp16, []> var_2706_to_fp16 = const()[name = tensor<string, []>("op_2706_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_243_cast_fp16 = mul(x = var_2705_cast_fp16, y = var_2706_to_fp16)[name = tensor<string, []>("aw_chunk_243_cast_fp16")];
+            tensor<string, []> var_2709_equation_0 = const()[name = tensor<string, []>("op_2709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2709_cast_fp16 = einsum(equation = var_2709_equation_0, values = (var_2535_cast_fp16, var_2359_cast_fp16))[name = tensor<string, []>("op_2709_cast_fp16")];
+            tensor<fp16, []> var_2710_to_fp16 = const()[name = tensor<string, []>("op_2710_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_245_cast_fp16 = mul(x = var_2709_cast_fp16, y = var_2710_to_fp16)[name = tensor<string, []>("aw_chunk_245_cast_fp16")];
+            tensor<string, []> var_2713_equation_0 = const()[name = tensor<string, []>("op_2713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2713_cast_fp16 = einsum(equation = var_2713_equation_0, values = (var_2535_cast_fp16, var_2366_cast_fp16))[name = tensor<string, []>("op_2713_cast_fp16")];
+            tensor<fp16, []> var_2714_to_fp16 = const()[name = tensor<string, []>("op_2714_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_247_cast_fp16 = mul(x = var_2713_cast_fp16, y = var_2714_to_fp16)[name = tensor<string, []>("aw_chunk_247_cast_fp16")];
+            tensor<string, []> var_2717_equation_0 = const()[name = tensor<string, []>("op_2717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2717_cast_fp16 = einsum(equation = var_2717_equation_0, values = (var_2539_cast_fp16, var_2373_cast_fp16))[name = tensor<string, []>("op_2717_cast_fp16")];
+            tensor<fp16, []> var_2718_to_fp16 = const()[name = tensor<string, []>("op_2718_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_249_cast_fp16 = mul(x = var_2717_cast_fp16, y = var_2718_to_fp16)[name = tensor<string, []>("aw_chunk_249_cast_fp16")];
+            tensor<string, []> var_2721_equation_0 = const()[name = tensor<string, []>("op_2721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2721_cast_fp16 = einsum(equation = var_2721_equation_0, values = (var_2539_cast_fp16, var_2380_cast_fp16))[name = tensor<string, []>("op_2721_cast_fp16")];
+            tensor<fp16, []> var_2722_to_fp16 = const()[name = tensor<string, []>("op_2722_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_251_cast_fp16 = mul(x = var_2721_cast_fp16, y = var_2722_to_fp16)[name = tensor<string, []>("aw_chunk_251_cast_fp16")];
+            tensor<string, []> var_2725_equation_0 = const()[name = tensor<string, []>("op_2725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2725_cast_fp16 = einsum(equation = var_2725_equation_0, values = (var_2539_cast_fp16, var_2387_cast_fp16))[name = tensor<string, []>("op_2725_cast_fp16")];
+            tensor<fp16, []> var_2726_to_fp16 = const()[name = tensor<string, []>("op_2726_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_253_cast_fp16 = mul(x = var_2725_cast_fp16, y = var_2726_to_fp16)[name = tensor<string, []>("aw_chunk_253_cast_fp16")];
+            tensor<string, []> var_2729_equation_0 = const()[name = tensor<string, []>("op_2729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2729_cast_fp16 = einsum(equation = var_2729_equation_0, values = (var_2539_cast_fp16, var_2394_cast_fp16))[name = tensor<string, []>("op_2729_cast_fp16")];
+            tensor<fp16, []> var_2730_to_fp16 = const()[name = tensor<string, []>("op_2730_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_255_cast_fp16 = mul(x = var_2729_cast_fp16, y = var_2730_to_fp16)[name = tensor<string, []>("aw_chunk_255_cast_fp16")];
+            tensor<string, []> var_2733_equation_0 = const()[name = tensor<string, []>("op_2733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2733_cast_fp16 = einsum(equation = var_2733_equation_0, values = (var_2543_cast_fp16, var_2401_cast_fp16))[name = tensor<string, []>("op_2733_cast_fp16")];
+            tensor<fp16, []> var_2734_to_fp16 = const()[name = tensor<string, []>("op_2734_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_257_cast_fp16 = mul(x = var_2733_cast_fp16, y = var_2734_to_fp16)[name = tensor<string, []>("aw_chunk_257_cast_fp16")];
+            tensor<string, []> var_2737_equation_0 = const()[name = tensor<string, []>("op_2737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2737_cast_fp16 = einsum(equation = var_2737_equation_0, values = (var_2543_cast_fp16, var_2408_cast_fp16))[name = tensor<string, []>("op_2737_cast_fp16")];
+            tensor<fp16, []> var_2738_to_fp16 = const()[name = tensor<string, []>("op_2738_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_259_cast_fp16 = mul(x = var_2737_cast_fp16, y = var_2738_to_fp16)[name = tensor<string, []>("aw_chunk_259_cast_fp16")];
+            tensor<string, []> var_2741_equation_0 = const()[name = tensor<string, []>("op_2741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2741_cast_fp16 = einsum(equation = var_2741_equation_0, values = (var_2543_cast_fp16, var_2415_cast_fp16))[name = tensor<string, []>("op_2741_cast_fp16")];
+            tensor<fp16, []> var_2742_to_fp16 = const()[name = tensor<string, []>("op_2742_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_261_cast_fp16 = mul(x = var_2741_cast_fp16, y = var_2742_to_fp16)[name = tensor<string, []>("aw_chunk_261_cast_fp16")];
+            tensor<string, []> var_2745_equation_0 = const()[name = tensor<string, []>("op_2745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2745_cast_fp16 = einsum(equation = var_2745_equation_0, values = (var_2543_cast_fp16, var_2422_cast_fp16))[name = tensor<string, []>("op_2745_cast_fp16")];
+            tensor<fp16, []> var_2746_to_fp16 = const()[name = tensor<string, []>("op_2746_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_263_cast_fp16 = mul(x = var_2745_cast_fp16, y = var_2746_to_fp16)[name = tensor<string, []>("aw_chunk_263_cast_fp16")];
+            tensor<string, []> var_2749_equation_0 = const()[name = tensor<string, []>("op_2749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2749_cast_fp16 = einsum(equation = var_2749_equation_0, values = (var_2547_cast_fp16, var_2429_cast_fp16))[name = tensor<string, []>("op_2749_cast_fp16")];
+            tensor<fp16, []> var_2750_to_fp16 = const()[name = tensor<string, []>("op_2750_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_265_cast_fp16 = mul(x = var_2749_cast_fp16, y = var_2750_to_fp16)[name = tensor<string, []>("aw_chunk_265_cast_fp16")];
+            tensor<string, []> var_2753_equation_0 = const()[name = tensor<string, []>("op_2753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2753_cast_fp16 = einsum(equation = var_2753_equation_0, values = (var_2547_cast_fp16, var_2436_cast_fp16))[name = tensor<string, []>("op_2753_cast_fp16")];
+            tensor<fp16, []> var_2754_to_fp16 = const()[name = tensor<string, []>("op_2754_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_267_cast_fp16 = mul(x = var_2753_cast_fp16, y = var_2754_to_fp16)[name = tensor<string, []>("aw_chunk_267_cast_fp16")];
+            tensor<string, []> var_2757_equation_0 = const()[name = tensor<string, []>("op_2757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2757_cast_fp16 = einsum(equation = var_2757_equation_0, values = (var_2547_cast_fp16, var_2443_cast_fp16))[name = tensor<string, []>("op_2757_cast_fp16")];
+            tensor<fp16, []> var_2758_to_fp16 = const()[name = tensor<string, []>("op_2758_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_269_cast_fp16 = mul(x = var_2757_cast_fp16, y = var_2758_to_fp16)[name = tensor<string, []>("aw_chunk_269_cast_fp16")];
+            tensor<string, []> var_2761_equation_0 = const()[name = tensor<string, []>("op_2761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2761_cast_fp16 = einsum(equation = var_2761_equation_0, values = (var_2547_cast_fp16, var_2450_cast_fp16))[name = tensor<string, []>("op_2761_cast_fp16")];
+            tensor<fp16, []> var_2762_to_fp16 = const()[name = tensor<string, []>("op_2762_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_271_cast_fp16 = mul(x = var_2761_cast_fp16, y = var_2762_to_fp16)[name = tensor<string, []>("aw_chunk_271_cast_fp16")];
+            tensor<string, []> var_2765_equation_0 = const()[name = tensor<string, []>("op_2765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2765_cast_fp16 = einsum(equation = var_2765_equation_0, values = (var_2551_cast_fp16, var_2457_cast_fp16))[name = tensor<string, []>("op_2765_cast_fp16")];
+            tensor<fp16, []> var_2766_to_fp16 = const()[name = tensor<string, []>("op_2766_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_273_cast_fp16 = mul(x = var_2765_cast_fp16, y = var_2766_to_fp16)[name = tensor<string, []>("aw_chunk_273_cast_fp16")];
+            tensor<string, []> var_2769_equation_0 = const()[name = tensor<string, []>("op_2769_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2769_cast_fp16 = einsum(equation = var_2769_equation_0, values = (var_2551_cast_fp16, var_2464_cast_fp16))[name = tensor<string, []>("op_2769_cast_fp16")];
+            tensor<fp16, []> var_2770_to_fp16 = const()[name = tensor<string, []>("op_2770_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_275_cast_fp16 = mul(x = var_2769_cast_fp16, y = var_2770_to_fp16)[name = tensor<string, []>("aw_chunk_275_cast_fp16")];
+            tensor<string, []> var_2773_equation_0 = const()[name = tensor<string, []>("op_2773_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2773_cast_fp16 = einsum(equation = var_2773_equation_0, values = (var_2551_cast_fp16, var_2471_cast_fp16))[name = tensor<string, []>("op_2773_cast_fp16")];
+            tensor<fp16, []> var_2774_to_fp16 = const()[name = tensor<string, []>("op_2774_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_277_cast_fp16 = mul(x = var_2773_cast_fp16, y = var_2774_to_fp16)[name = tensor<string, []>("aw_chunk_277_cast_fp16")];
+            tensor<string, []> var_2777_equation_0 = const()[name = tensor<string, []>("op_2777_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2777_cast_fp16 = einsum(equation = var_2777_equation_0, values = (var_2551_cast_fp16, var_2478_cast_fp16))[name = tensor<string, []>("op_2777_cast_fp16")];
+            tensor<fp16, []> var_2778_to_fp16 = const()[name = tensor<string, []>("op_2778_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_279_cast_fp16 = mul(x = var_2777_cast_fp16, y = var_2778_to_fp16)[name = tensor<string, []>("aw_chunk_279_cast_fp16")];
+            tensor<string, []> var_2781_equation_0 = const()[name = tensor<string, []>("op_2781_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2781_cast_fp16 = einsum(equation = var_2781_equation_0, values = (var_2555_cast_fp16, var_2485_cast_fp16))[name = tensor<string, []>("op_2781_cast_fp16")];
+            tensor<fp16, []> var_2782_to_fp16 = const()[name = tensor<string, []>("op_2782_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_281_cast_fp16 = mul(x = var_2781_cast_fp16, y = var_2782_to_fp16)[name = tensor<string, []>("aw_chunk_281_cast_fp16")];
+            tensor<string, []> var_2785_equation_0 = const()[name = tensor<string, []>("op_2785_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2785_cast_fp16 = einsum(equation = var_2785_equation_0, values = (var_2555_cast_fp16, var_2492_cast_fp16))[name = tensor<string, []>("op_2785_cast_fp16")];
+            tensor<fp16, []> var_2786_to_fp16 = const()[name = tensor<string, []>("op_2786_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_283_cast_fp16 = mul(x = var_2785_cast_fp16, y = var_2786_to_fp16)[name = tensor<string, []>("aw_chunk_283_cast_fp16")];
+            tensor<string, []> var_2789_equation_0 = const()[name = tensor<string, []>("op_2789_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2789_cast_fp16 = einsum(equation = var_2789_equation_0, values = (var_2555_cast_fp16, var_2499_cast_fp16))[name = tensor<string, []>("op_2789_cast_fp16")];
+            tensor<fp16, []> var_2790_to_fp16 = const()[name = tensor<string, []>("op_2790_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_285_cast_fp16 = mul(x = var_2789_cast_fp16, y = var_2790_to_fp16)[name = tensor<string, []>("aw_chunk_285_cast_fp16")];
+            tensor<string, []> var_2793_equation_0 = const()[name = tensor<string, []>("op_2793_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2793_cast_fp16 = einsum(equation = var_2793_equation_0, values = (var_2555_cast_fp16, var_2506_cast_fp16))[name = tensor<string, []>("op_2793_cast_fp16")];
+            tensor<fp16, []> var_2794_to_fp16 = const()[name = tensor<string, []>("op_2794_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_287_cast_fp16 = mul(x = var_2793_cast_fp16, y = var_2794_to_fp16)[name = tensor<string, []>("aw_chunk_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2796_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_193_cast_fp16)[name = tensor<string, []>("op_2796_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2797_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_195_cast_fp16)[name = tensor<string, []>("op_2797_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2798_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_197_cast_fp16)[name = tensor<string, []>("op_2798_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2799_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_199_cast_fp16)[name = tensor<string, []>("op_2799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2800_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_201_cast_fp16)[name = tensor<string, []>("op_2800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2801_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_203_cast_fp16)[name = tensor<string, []>("op_2801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2802_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_205_cast_fp16)[name = tensor<string, []>("op_2802_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2803_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_207_cast_fp16)[name = tensor<string, []>("op_2803_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2804_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_209_cast_fp16)[name = tensor<string, []>("op_2804_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2805_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_211_cast_fp16)[name = tensor<string, []>("op_2805_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2806_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_213_cast_fp16)[name = tensor<string, []>("op_2806_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2807_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_215_cast_fp16)[name = tensor<string, []>("op_2807_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2808_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_217_cast_fp16)[name = tensor<string, []>("op_2808_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2809_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_219_cast_fp16)[name = tensor<string, []>("op_2809_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2810_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_221_cast_fp16)[name = tensor<string, []>("op_2810_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2811_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_223_cast_fp16)[name = tensor<string, []>("op_2811_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2812_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_225_cast_fp16)[name = tensor<string, []>("op_2812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2813_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_227_cast_fp16)[name = tensor<string, []>("op_2813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2814_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_229_cast_fp16)[name = tensor<string, []>("op_2814_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2815_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_231_cast_fp16)[name = tensor<string, []>("op_2815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2816_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_233_cast_fp16)[name = tensor<string, []>("op_2816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2817_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_235_cast_fp16)[name = tensor<string, []>("op_2817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2818_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_237_cast_fp16)[name = tensor<string, []>("op_2818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2819_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_239_cast_fp16)[name = tensor<string, []>("op_2819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2820_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_241_cast_fp16)[name = tensor<string, []>("op_2820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2821_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_243_cast_fp16)[name = tensor<string, []>("op_2821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2822_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_245_cast_fp16)[name = tensor<string, []>("op_2822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2823_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_247_cast_fp16)[name = tensor<string, []>("op_2823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2824_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_249_cast_fp16)[name = tensor<string, []>("op_2824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2825_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_251_cast_fp16)[name = tensor<string, []>("op_2825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2826_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_253_cast_fp16)[name = tensor<string, []>("op_2826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2827_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_255_cast_fp16)[name = tensor<string, []>("op_2827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2828_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_257_cast_fp16)[name = tensor<string, []>("op_2828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2829_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_259_cast_fp16)[name = tensor<string, []>("op_2829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2830_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_261_cast_fp16)[name = tensor<string, []>("op_2830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2831_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_263_cast_fp16)[name = tensor<string, []>("op_2831_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2832_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_265_cast_fp16)[name = tensor<string, []>("op_2832_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2833_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_267_cast_fp16)[name = tensor<string, []>("op_2833_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2834_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_269_cast_fp16)[name = tensor<string, []>("op_2834_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2835_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_271_cast_fp16)[name = tensor<string, []>("op_2835_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2836_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_273_cast_fp16)[name = tensor<string, []>("op_2836_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2837_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_275_cast_fp16)[name = tensor<string, []>("op_2837_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2838_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_277_cast_fp16)[name = tensor<string, []>("op_2838_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2839_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_279_cast_fp16)[name = tensor<string, []>("op_2839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2840_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_281_cast_fp16)[name = tensor<string, []>("op_2840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2841_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_283_cast_fp16)[name = tensor<string, []>("op_2841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2842_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_285_cast_fp16)[name = tensor<string, []>("op_2842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2843_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_287_cast_fp16)[name = tensor<string, []>("op_2843_cast_fp16")];
+            tensor<string, []> var_2845_equation_0 = const()[name = tensor<string, []>("op_2845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2845_cast_fp16 = einsum(equation = var_2845_equation_0, values = (var_2557_cast_fp16, var_2796_cast_fp16))[name = tensor<string, []>("op_2845_cast_fp16")];
+            tensor<string, []> var_2847_equation_0 = const()[name = tensor<string, []>("op_2847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2847_cast_fp16 = einsum(equation = var_2847_equation_0, values = (var_2557_cast_fp16, var_2797_cast_fp16))[name = tensor<string, []>("op_2847_cast_fp16")];
+            tensor<string, []> var_2849_equation_0 = const()[name = tensor<string, []>("op_2849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2849_cast_fp16 = einsum(equation = var_2849_equation_0, values = (var_2557_cast_fp16, var_2798_cast_fp16))[name = tensor<string, []>("op_2849_cast_fp16")];
+            tensor<string, []> var_2851_equation_0 = const()[name = tensor<string, []>("op_2851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2851_cast_fp16 = einsum(equation = var_2851_equation_0, values = (var_2557_cast_fp16, var_2799_cast_fp16))[name = tensor<string, []>("op_2851_cast_fp16")];
+            tensor<string, []> var_2853_equation_0 = const()[name = tensor<string, []>("op_2853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2853_cast_fp16 = einsum(equation = var_2853_equation_0, values = (var_2561_cast_fp16, var_2800_cast_fp16))[name = tensor<string, []>("op_2853_cast_fp16")];
+            tensor<string, []> var_2855_equation_0 = const()[name = tensor<string, []>("op_2855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2855_cast_fp16 = einsum(equation = var_2855_equation_0, values = (var_2561_cast_fp16, var_2801_cast_fp16))[name = tensor<string, []>("op_2855_cast_fp16")];
+            tensor<string, []> var_2857_equation_0 = const()[name = tensor<string, []>("op_2857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2857_cast_fp16 = einsum(equation = var_2857_equation_0, values = (var_2561_cast_fp16, var_2802_cast_fp16))[name = tensor<string, []>("op_2857_cast_fp16")];
+            tensor<string, []> var_2859_equation_0 = const()[name = tensor<string, []>("op_2859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2859_cast_fp16 = einsum(equation = var_2859_equation_0, values = (var_2561_cast_fp16, var_2803_cast_fp16))[name = tensor<string, []>("op_2859_cast_fp16")];
+            tensor<string, []> var_2861_equation_0 = const()[name = tensor<string, []>("op_2861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2861_cast_fp16 = einsum(equation = var_2861_equation_0, values = (var_2565_cast_fp16, var_2804_cast_fp16))[name = tensor<string, []>("op_2861_cast_fp16")];
+            tensor<string, []> var_2863_equation_0 = const()[name = tensor<string, []>("op_2863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2863_cast_fp16 = einsum(equation = var_2863_equation_0, values = (var_2565_cast_fp16, var_2805_cast_fp16))[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<string, []> var_2865_equation_0 = const()[name = tensor<string, []>("op_2865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2865_cast_fp16 = einsum(equation = var_2865_equation_0, values = (var_2565_cast_fp16, var_2806_cast_fp16))[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<string, []> var_2867_equation_0 = const()[name = tensor<string, []>("op_2867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2867_cast_fp16 = einsum(equation = var_2867_equation_0, values = (var_2565_cast_fp16, var_2807_cast_fp16))[name = tensor<string, []>("op_2867_cast_fp16")];
+            tensor<string, []> var_2869_equation_0 = const()[name = tensor<string, []>("op_2869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2869_cast_fp16 = einsum(equation = var_2869_equation_0, values = (var_2569_cast_fp16, var_2808_cast_fp16))[name = tensor<string, []>("op_2869_cast_fp16")];
+            tensor<string, []> var_2871_equation_0 = const()[name = tensor<string, []>("op_2871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2871_cast_fp16 = einsum(equation = var_2871_equation_0, values = (var_2569_cast_fp16, var_2809_cast_fp16))[name = tensor<string, []>("op_2871_cast_fp16")];
+            tensor<string, []> var_2873_equation_0 = const()[name = tensor<string, []>("op_2873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2873_cast_fp16 = einsum(equation = var_2873_equation_0, values = (var_2569_cast_fp16, var_2810_cast_fp16))[name = tensor<string, []>("op_2873_cast_fp16")];
+            tensor<string, []> var_2875_equation_0 = const()[name = tensor<string, []>("op_2875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2875_cast_fp16 = einsum(equation = var_2875_equation_0, values = (var_2569_cast_fp16, var_2811_cast_fp16))[name = tensor<string, []>("op_2875_cast_fp16")];
+            tensor<string, []> var_2877_equation_0 = const()[name = tensor<string, []>("op_2877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2877_cast_fp16 = einsum(equation = var_2877_equation_0, values = (var_2573_cast_fp16, var_2812_cast_fp16))[name = tensor<string, []>("op_2877_cast_fp16")];
+            tensor<string, []> var_2879_equation_0 = const()[name = tensor<string, []>("op_2879_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2879_cast_fp16 = einsum(equation = var_2879_equation_0, values = (var_2573_cast_fp16, var_2813_cast_fp16))[name = tensor<string, []>("op_2879_cast_fp16")];
+            tensor<string, []> var_2881_equation_0 = const()[name = tensor<string, []>("op_2881_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2881_cast_fp16 = einsum(equation = var_2881_equation_0, values = (var_2573_cast_fp16, var_2814_cast_fp16))[name = tensor<string, []>("op_2881_cast_fp16")];
+            tensor<string, []> var_2883_equation_0 = const()[name = tensor<string, []>("op_2883_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2883_cast_fp16 = einsum(equation = var_2883_equation_0, values = (var_2573_cast_fp16, var_2815_cast_fp16))[name = tensor<string, []>("op_2883_cast_fp16")];
+            tensor<string, []> var_2885_equation_0 = const()[name = tensor<string, []>("op_2885_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2885_cast_fp16 = einsum(equation = var_2885_equation_0, values = (var_2577_cast_fp16, var_2816_cast_fp16))[name = tensor<string, []>("op_2885_cast_fp16")];
+            tensor<string, []> var_2887_equation_0 = const()[name = tensor<string, []>("op_2887_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2887_cast_fp16 = einsum(equation = var_2887_equation_0, values = (var_2577_cast_fp16, var_2817_cast_fp16))[name = tensor<string, []>("op_2887_cast_fp16")];
+            tensor<string, []> var_2889_equation_0 = const()[name = tensor<string, []>("op_2889_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2889_cast_fp16 = einsum(equation = var_2889_equation_0, values = (var_2577_cast_fp16, var_2818_cast_fp16))[name = tensor<string, []>("op_2889_cast_fp16")];
+            tensor<string, []> var_2891_equation_0 = const()[name = tensor<string, []>("op_2891_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2891_cast_fp16 = einsum(equation = var_2891_equation_0, values = (var_2577_cast_fp16, var_2819_cast_fp16))[name = tensor<string, []>("op_2891_cast_fp16")];
+            tensor<string, []> var_2893_equation_0 = const()[name = tensor<string, []>("op_2893_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2893_cast_fp16 = einsum(equation = var_2893_equation_0, values = (var_2581_cast_fp16, var_2820_cast_fp16))[name = tensor<string, []>("op_2893_cast_fp16")];
+            tensor<string, []> var_2895_equation_0 = const()[name = tensor<string, []>("op_2895_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2895_cast_fp16 = einsum(equation = var_2895_equation_0, values = (var_2581_cast_fp16, var_2821_cast_fp16))[name = tensor<string, []>("op_2895_cast_fp16")];
+            tensor<string, []> var_2897_equation_0 = const()[name = tensor<string, []>("op_2897_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2897_cast_fp16 = einsum(equation = var_2897_equation_0, values = (var_2581_cast_fp16, var_2822_cast_fp16))[name = tensor<string, []>("op_2897_cast_fp16")];
+            tensor<string, []> var_2899_equation_0 = const()[name = tensor<string, []>("op_2899_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2899_cast_fp16 = einsum(equation = var_2899_equation_0, values = (var_2581_cast_fp16, var_2823_cast_fp16))[name = tensor<string, []>("op_2899_cast_fp16")];
+            tensor<string, []> var_2901_equation_0 = const()[name = tensor<string, []>("op_2901_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2901_cast_fp16 = einsum(equation = var_2901_equation_0, values = (var_2585_cast_fp16, var_2824_cast_fp16))[name = tensor<string, []>("op_2901_cast_fp16")];
+            tensor<string, []> var_2903_equation_0 = const()[name = tensor<string, []>("op_2903_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2903_cast_fp16 = einsum(equation = var_2903_equation_0, values = (var_2585_cast_fp16, var_2825_cast_fp16))[name = tensor<string, []>("op_2903_cast_fp16")];
+            tensor<string, []> var_2905_equation_0 = const()[name = tensor<string, []>("op_2905_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2905_cast_fp16 = einsum(equation = var_2905_equation_0, values = (var_2585_cast_fp16, var_2826_cast_fp16))[name = tensor<string, []>("op_2905_cast_fp16")];
+            tensor<string, []> var_2907_equation_0 = const()[name = tensor<string, []>("op_2907_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2907_cast_fp16 = einsum(equation = var_2907_equation_0, values = (var_2585_cast_fp16, var_2827_cast_fp16))[name = tensor<string, []>("op_2907_cast_fp16")];
+            tensor<string, []> var_2909_equation_0 = const()[name = tensor<string, []>("op_2909_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2909_cast_fp16 = einsum(equation = var_2909_equation_0, values = (var_2589_cast_fp16, var_2828_cast_fp16))[name = tensor<string, []>("op_2909_cast_fp16")];
+            tensor<string, []> var_2911_equation_0 = const()[name = tensor<string, []>("op_2911_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2911_cast_fp16 = einsum(equation = var_2911_equation_0, values = (var_2589_cast_fp16, var_2829_cast_fp16))[name = tensor<string, []>("op_2911_cast_fp16")];
+            tensor<string, []> var_2913_equation_0 = const()[name = tensor<string, []>("op_2913_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2913_cast_fp16 = einsum(equation = var_2913_equation_0, values = (var_2589_cast_fp16, var_2830_cast_fp16))[name = tensor<string, []>("op_2913_cast_fp16")];
+            tensor<string, []> var_2915_equation_0 = const()[name = tensor<string, []>("op_2915_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2915_cast_fp16 = einsum(equation = var_2915_equation_0, values = (var_2589_cast_fp16, var_2831_cast_fp16))[name = tensor<string, []>("op_2915_cast_fp16")];
+            tensor<string, []> var_2917_equation_0 = const()[name = tensor<string, []>("op_2917_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2917_cast_fp16 = einsum(equation = var_2917_equation_0, values = (var_2593_cast_fp16, var_2832_cast_fp16))[name = tensor<string, []>("op_2917_cast_fp16")];
+            tensor<string, []> var_2919_equation_0 = const()[name = tensor<string, []>("op_2919_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2919_cast_fp16 = einsum(equation = var_2919_equation_0, values = (var_2593_cast_fp16, var_2833_cast_fp16))[name = tensor<string, []>("op_2919_cast_fp16")];
+            tensor<string, []> var_2921_equation_0 = const()[name = tensor<string, []>("op_2921_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2921_cast_fp16 = einsum(equation = var_2921_equation_0, values = (var_2593_cast_fp16, var_2834_cast_fp16))[name = tensor<string, []>("op_2921_cast_fp16")];
+            tensor<string, []> var_2923_equation_0 = const()[name = tensor<string, []>("op_2923_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2923_cast_fp16 = einsum(equation = var_2923_equation_0, values = (var_2593_cast_fp16, var_2835_cast_fp16))[name = tensor<string, []>("op_2923_cast_fp16")];
+            tensor<string, []> var_2925_equation_0 = const()[name = tensor<string, []>("op_2925_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2925_cast_fp16 = einsum(equation = var_2925_equation_0, values = (var_2597_cast_fp16, var_2836_cast_fp16))[name = tensor<string, []>("op_2925_cast_fp16")];
+            tensor<string, []> var_2927_equation_0 = const()[name = tensor<string, []>("op_2927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2927_cast_fp16 = einsum(equation = var_2927_equation_0, values = (var_2597_cast_fp16, var_2837_cast_fp16))[name = tensor<string, []>("op_2927_cast_fp16")];
+            tensor<string, []> var_2929_equation_0 = const()[name = tensor<string, []>("op_2929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2929_cast_fp16 = einsum(equation = var_2929_equation_0, values = (var_2597_cast_fp16, var_2838_cast_fp16))[name = tensor<string, []>("op_2929_cast_fp16")];
+            tensor<string, []> var_2931_equation_0 = const()[name = tensor<string, []>("op_2931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2931_cast_fp16 = einsum(equation = var_2931_equation_0, values = (var_2597_cast_fp16, var_2839_cast_fp16))[name = tensor<string, []>("op_2931_cast_fp16")];
+            tensor<string, []> var_2933_equation_0 = const()[name = tensor<string, []>("op_2933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2933_cast_fp16 = einsum(equation = var_2933_equation_0, values = (var_2601_cast_fp16, var_2840_cast_fp16))[name = tensor<string, []>("op_2933_cast_fp16")];
+            tensor<string, []> var_2935_equation_0 = const()[name = tensor<string, []>("op_2935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2935_cast_fp16 = einsum(equation = var_2935_equation_0, values = (var_2601_cast_fp16, var_2841_cast_fp16))[name = tensor<string, []>("op_2935_cast_fp16")];
+            tensor<string, []> var_2937_equation_0 = const()[name = tensor<string, []>("op_2937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2937_cast_fp16 = einsum(equation = var_2937_equation_0, values = (var_2601_cast_fp16, var_2842_cast_fp16))[name = tensor<string, []>("op_2937_cast_fp16")];
+            tensor<string, []> var_2939_equation_0 = const()[name = tensor<string, []>("op_2939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2939_cast_fp16 = einsum(equation = var_2939_equation_0, values = (var_2601_cast_fp16, var_2843_cast_fp16))[name = tensor<string, []>("op_2939_cast_fp16")];
+            tensor<bool, []> var_2941_interleave_0 = const()[name = tensor<string, []>("op_2941_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2941_cast_fp16 = concat(axis = var_2052, interleave = var_2941_interleave_0, values = (var_2845_cast_fp16, var_2847_cast_fp16, var_2849_cast_fp16, var_2851_cast_fp16))[name = tensor<string, []>("op_2941_cast_fp16")];
+            tensor<bool, []> var_2943_interleave_0 = const()[name = tensor<string, []>("op_2943_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2943_cast_fp16 = concat(axis = var_2052, interleave = var_2943_interleave_0, values = (var_2853_cast_fp16, var_2855_cast_fp16, var_2857_cast_fp16, var_2859_cast_fp16))[name = tensor<string, []>("op_2943_cast_fp16")];
+            tensor<bool, []> var_2945_interleave_0 = const()[name = tensor<string, []>("op_2945_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2945_cast_fp16 = concat(axis = var_2052, interleave = var_2945_interleave_0, values = (var_2861_cast_fp16, var_2863_cast_fp16, var_2865_cast_fp16, var_2867_cast_fp16))[name = tensor<string, []>("op_2945_cast_fp16")];
+            tensor<bool, []> var_2947_interleave_0 = const()[name = tensor<string, []>("op_2947_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2947_cast_fp16 = concat(axis = var_2052, interleave = var_2947_interleave_0, values = (var_2869_cast_fp16, var_2871_cast_fp16, var_2873_cast_fp16, var_2875_cast_fp16))[name = tensor<string, []>("op_2947_cast_fp16")];
+            tensor<bool, []> var_2949_interleave_0 = const()[name = tensor<string, []>("op_2949_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2949_cast_fp16 = concat(axis = var_2052, interleave = var_2949_interleave_0, values = (var_2877_cast_fp16, var_2879_cast_fp16, var_2881_cast_fp16, var_2883_cast_fp16))[name = tensor<string, []>("op_2949_cast_fp16")];
+            tensor<bool, []> var_2951_interleave_0 = const()[name = tensor<string, []>("op_2951_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2951_cast_fp16 = concat(axis = var_2052, interleave = var_2951_interleave_0, values = (var_2885_cast_fp16, var_2887_cast_fp16, var_2889_cast_fp16, var_2891_cast_fp16))[name = tensor<string, []>("op_2951_cast_fp16")];
+            tensor<bool, []> var_2953_interleave_0 = const()[name = tensor<string, []>("op_2953_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2953_cast_fp16 = concat(axis = var_2052, interleave = var_2953_interleave_0, values = (var_2893_cast_fp16, var_2895_cast_fp16, var_2897_cast_fp16, var_2899_cast_fp16))[name = tensor<string, []>("op_2953_cast_fp16")];
+            tensor<bool, []> var_2955_interleave_0 = const()[name = tensor<string, []>("op_2955_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2955_cast_fp16 = concat(axis = var_2052, interleave = var_2955_interleave_0, values = (var_2901_cast_fp16, var_2903_cast_fp16, var_2905_cast_fp16, var_2907_cast_fp16))[name = tensor<string, []>("op_2955_cast_fp16")];
+            tensor<bool, []> var_2957_interleave_0 = const()[name = tensor<string, []>("op_2957_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2957_cast_fp16 = concat(axis = var_2052, interleave = var_2957_interleave_0, values = (var_2909_cast_fp16, var_2911_cast_fp16, var_2913_cast_fp16, var_2915_cast_fp16))[name = tensor<string, []>("op_2957_cast_fp16")];
+            tensor<bool, []> var_2959_interleave_0 = const()[name = tensor<string, []>("op_2959_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2959_cast_fp16 = concat(axis = var_2052, interleave = var_2959_interleave_0, values = (var_2917_cast_fp16, var_2919_cast_fp16, var_2921_cast_fp16, var_2923_cast_fp16))[name = tensor<string, []>("op_2959_cast_fp16")];
+            tensor<bool, []> var_2961_interleave_0 = const()[name = tensor<string, []>("op_2961_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2961_cast_fp16 = concat(axis = var_2052, interleave = var_2961_interleave_0, values = (var_2925_cast_fp16, var_2927_cast_fp16, var_2929_cast_fp16, var_2931_cast_fp16))[name = tensor<string, []>("op_2961_cast_fp16")];
+            tensor<bool, []> var_2963_interleave_0 = const()[name = tensor<string, []>("op_2963_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2963_cast_fp16 = concat(axis = var_2052, interleave = var_2963_interleave_0, values = (var_2933_cast_fp16, var_2935_cast_fp16, var_2937_cast_fp16, var_2939_cast_fp16))[name = tensor<string, []>("op_2963_cast_fp16")];
+            tensor<bool, []> input_17_interleave_0 = const()[name = tensor<string, []>("input_17_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = concat(axis = var_2069, interleave = input_17_interleave_0, values = (var_2941_cast_fp16, var_2943_cast_fp16, var_2945_cast_fp16, var_2947_cast_fp16, var_2949_cast_fp16, var_2951_cast_fp16, var_2953_cast_fp16, var_2955_cast_fp16, var_2957_cast_fp16, var_2959_cast_fp16, var_2961_cast_fp16, var_2963_cast_fp16))[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<int32, [2]> var_2968 = const()[name = tensor<string, []>("op_2968"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2970 = const()[name = tensor<string, []>("op_2970"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38114112)))];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39293824)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_2970, groups = var_2069, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_2968, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_2976 = const()[name = tensor<string, []>("op_2976"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_2976, keep_dims = var_2070, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_2980 = const()[name = tensor<string, []>("op_2980"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2981_cast_fp16 = reduce_mean(axes = var_2980, keep_dims = var_2070, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_2981_cast_fp16")];
+            tensor<fp16, []> var_2982_to_fp16 = const()[name = tensor<string, []>("op_2982_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2983_cast_fp16 = add(x = var_2981_cast_fp16, y = var_2982_to_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_2983_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39295424)))];
+            tensor<fp16, [768]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39297024)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_2994 = const()[name = tensor<string, []>("op_2994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2996 = const()[name = tensor<string, []>("op_2996"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39298624)))];
+            tensor<fp16, [3072]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44017280)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_2996, groups = var_2069, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = var_2994, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_3002 = const()[name = tensor<string, []>("op_3002"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3004 = const()[name = tensor<string, []>("op_3004"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44023488)))];
+            tensor<fp16, [768]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48742144)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_3004, groups = var_2069, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_3002, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_3011 = const()[name = tensor<string, []>("op_3011"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_3028 = const()[name = tensor<string, []>("op_3028"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_3029 = const()[name = tensor<string, []>("op_3029"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_3039 = const()[name = tensor<string, []>("op_3039"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_3039, keep_dims = var_3029, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_3043 = const()[name = tensor<string, []>("op_3043"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3044_cast_fp16 = reduce_mean(axes = var_3043, keep_dims = var_3029, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_3044_cast_fp16")];
+            tensor<fp16, []> var_3045_to_fp16 = const()[name = tensor<string, []>("op_3045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3046_cast_fp16 = add(x = var_3044_cast_fp16, y = var_3045_to_fp16)[name = tensor<string, []>("op_3046_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_3046_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48743744)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48745344)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [2]> var_3061 = const()[name = tensor<string, []>("op_3061"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3063 = const()[name = tensor<string, []>("op_3063"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48746944)))];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49926656)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_3063, groups = var_3028, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_3061, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_3067 = const()[name = tensor<string, []>("op_3067"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3069 = const()[name = tensor<string, []>("op_3069"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49928256)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_3069, groups = var_3028, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_3067, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_3074 = const()[name = tensor<string, []>("op_3074"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3076 = const()[name = tensor<string, []>("op_3076"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51107968)))];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52287680)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_3076, groups = var_3028, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_3074, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_3083_begin_0 = const()[name = tensor<string, []>("op_3083_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3083_end_0 = const()[name = tensor<string, []>("op_3083_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3083_end_mask_0 = const()[name = tensor<string, []>("op_3083_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3083_cast_fp16 = slice_by_index(begin = var_3083_begin_0, end = var_3083_end_0, end_mask = var_3083_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3083_cast_fp16")];
+            tensor<int32, [4]> var_3087_begin_0 = const()[name = tensor<string, []>("op_3087_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3087_end_0 = const()[name = tensor<string, []>("op_3087_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3087_end_mask_0 = const()[name = tensor<string, []>("op_3087_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3087_cast_fp16 = slice_by_index(begin = var_3087_begin_0, end = var_3087_end_0, end_mask = var_3087_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3087_cast_fp16")];
+            tensor<int32, [4]> var_3091_begin_0 = const()[name = tensor<string, []>("op_3091_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3091_end_0 = const()[name = tensor<string, []>("op_3091_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3091_end_mask_0 = const()[name = tensor<string, []>("op_3091_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3091_cast_fp16 = slice_by_index(begin = var_3091_begin_0, end = var_3091_end_0, end_mask = var_3091_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3091_cast_fp16")];
+            tensor<int32, [4]> var_3095_begin_0 = const()[name = tensor<string, []>("op_3095_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3095_end_0 = const()[name = tensor<string, []>("op_3095_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3095_end_mask_0 = const()[name = tensor<string, []>("op_3095_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3095_cast_fp16 = slice_by_index(begin = var_3095_begin_0, end = var_3095_end_0, end_mask = var_3095_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3095_cast_fp16")];
+            tensor<int32, [4]> var_3099_begin_0 = const()[name = tensor<string, []>("op_3099_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3099_end_0 = const()[name = tensor<string, []>("op_3099_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3099_end_mask_0 = const()[name = tensor<string, []>("op_3099_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3099_cast_fp16 = slice_by_index(begin = var_3099_begin_0, end = var_3099_end_0, end_mask = var_3099_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3099_cast_fp16")];
+            tensor<int32, [4]> var_3103_begin_0 = const()[name = tensor<string, []>("op_3103_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3103_end_0 = const()[name = tensor<string, []>("op_3103_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3103_end_mask_0 = const()[name = tensor<string, []>("op_3103_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3103_cast_fp16 = slice_by_index(begin = var_3103_begin_0, end = var_3103_end_0, end_mask = var_3103_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3103_cast_fp16")];
+            tensor<int32, [4]> var_3107_begin_0 = const()[name = tensor<string, []>("op_3107_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3107_end_0 = const()[name = tensor<string, []>("op_3107_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3107_end_mask_0 = const()[name = tensor<string, []>("op_3107_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3107_cast_fp16 = slice_by_index(begin = var_3107_begin_0, end = var_3107_end_0, end_mask = var_3107_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3107_cast_fp16")];
+            tensor<int32, [4]> var_3111_begin_0 = const()[name = tensor<string, []>("op_3111_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3111_end_0 = const()[name = tensor<string, []>("op_3111_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3111_end_mask_0 = const()[name = tensor<string, []>("op_3111_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3111_cast_fp16 = slice_by_index(begin = var_3111_begin_0, end = var_3111_end_0, end_mask = var_3111_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3111_cast_fp16")];
+            tensor<int32, [4]> var_3115_begin_0 = const()[name = tensor<string, []>("op_3115_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_3115_end_0 = const()[name = tensor<string, []>("op_3115_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_3115_end_mask_0 = const()[name = tensor<string, []>("op_3115_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3115_cast_fp16 = slice_by_index(begin = var_3115_begin_0, end = var_3115_end_0, end_mask = var_3115_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3115_cast_fp16")];
+            tensor<int32, [4]> var_3119_begin_0 = const()[name = tensor<string, []>("op_3119_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_3119_end_0 = const()[name = tensor<string, []>("op_3119_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_3119_end_mask_0 = const()[name = tensor<string, []>("op_3119_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3119_cast_fp16 = slice_by_index(begin = var_3119_begin_0, end = var_3119_end_0, end_mask = var_3119_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3119_cast_fp16")];
+            tensor<int32, [4]> var_3123_begin_0 = const()[name = tensor<string, []>("op_3123_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_3123_end_0 = const()[name = tensor<string, []>("op_3123_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_3123_end_mask_0 = const()[name = tensor<string, []>("op_3123_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3123_cast_fp16 = slice_by_index(begin = var_3123_begin_0, end = var_3123_end_0, end_mask = var_3123_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3123_cast_fp16")];
+            tensor<int32, [4]> var_3127_begin_0 = const()[name = tensor<string, []>("op_3127_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_3127_end_0 = const()[name = tensor<string, []>("op_3127_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_3127_end_mask_0 = const()[name = tensor<string, []>("op_3127_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3127_cast_fp16 = slice_by_index(begin = var_3127_begin_0, end = var_3127_end_0, end_mask = var_3127_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3127_cast_fp16")];
+            tensor<int32, [4]> var_3136_begin_0 = const()[name = tensor<string, []>("op_3136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3136_end_0 = const()[name = tensor<string, []>("op_3136_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3136_end_mask_0 = const()[name = tensor<string, []>("op_3136_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3136_cast_fp16 = slice_by_index(begin = var_3136_begin_0, end = var_3136_end_0, end_mask = var_3136_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3136_cast_fp16")];
+            tensor<int32, [4]> var_3143_begin_0 = const()[name = tensor<string, []>("op_3143_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3143_end_0 = const()[name = tensor<string, []>("op_3143_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3143_end_mask_0 = const()[name = tensor<string, []>("op_3143_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3143_cast_fp16 = slice_by_index(begin = var_3143_begin_0, end = var_3143_end_0, end_mask = var_3143_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3143_cast_fp16")];
+            tensor<int32, [4]> var_3150_begin_0 = const()[name = tensor<string, []>("op_3150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3150_end_0 = const()[name = tensor<string, []>("op_3150_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3150_end_mask_0 = const()[name = tensor<string, []>("op_3150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3150_cast_fp16 = slice_by_index(begin = var_3150_begin_0, end = var_3150_end_0, end_mask = var_3150_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3150_cast_fp16")];
+            tensor<int32, [4]> var_3157_begin_0 = const()[name = tensor<string, []>("op_3157_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3157_end_0 = const()[name = tensor<string, []>("op_3157_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3157_end_mask_0 = const()[name = tensor<string, []>("op_3157_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3157_cast_fp16 = slice_by_index(begin = var_3157_begin_0, end = var_3157_end_0, end_mask = var_3157_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [4]> var_3164_begin_0 = const()[name = tensor<string, []>("op_3164_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3164_end_0 = const()[name = tensor<string, []>("op_3164_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3164_end_mask_0 = const()[name = tensor<string, []>("op_3164_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3164_cast_fp16 = slice_by_index(begin = var_3164_begin_0, end = var_3164_end_0, end_mask = var_3164_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3164_cast_fp16")];
+            tensor<int32, [4]> var_3171_begin_0 = const()[name = tensor<string, []>("op_3171_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3171_end_0 = const()[name = tensor<string, []>("op_3171_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3171_end_mask_0 = const()[name = tensor<string, []>("op_3171_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3171_cast_fp16 = slice_by_index(begin = var_3171_begin_0, end = var_3171_end_0, end_mask = var_3171_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3171_cast_fp16")];
+            tensor<int32, [4]> var_3178_begin_0 = const()[name = tensor<string, []>("op_3178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3178_end_0 = const()[name = tensor<string, []>("op_3178_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3178_end_mask_0 = const()[name = tensor<string, []>("op_3178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3178_cast_fp16 = slice_by_index(begin = var_3178_begin_0, end = var_3178_end_0, end_mask = var_3178_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3178_cast_fp16")];
+            tensor<int32, [4]> var_3185_begin_0 = const()[name = tensor<string, []>("op_3185_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3185_end_0 = const()[name = tensor<string, []>("op_3185_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3185_end_mask_0 = const()[name = tensor<string, []>("op_3185_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3185_cast_fp16")];
+            tensor<int32, [4]> var_3192_begin_0 = const()[name = tensor<string, []>("op_3192_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3192_end_0 = const()[name = tensor<string, []>("op_3192_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3192_end_mask_0 = const()[name = tensor<string, []>("op_3192_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3192_cast_fp16 = slice_by_index(begin = var_3192_begin_0, end = var_3192_end_0, end_mask = var_3192_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3192_cast_fp16")];
+            tensor<int32, [4]> var_3199_begin_0 = const()[name = tensor<string, []>("op_3199_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3199_end_0 = const()[name = tensor<string, []>("op_3199_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3199_end_mask_0 = const()[name = tensor<string, []>("op_3199_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3199_cast_fp16 = slice_by_index(begin = var_3199_begin_0, end = var_3199_end_0, end_mask = var_3199_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3199_cast_fp16")];
+            tensor<int32, [4]> var_3206_begin_0 = const()[name = tensor<string, []>("op_3206_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3206_end_0 = const()[name = tensor<string, []>("op_3206_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3206_end_mask_0 = const()[name = tensor<string, []>("op_3206_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3206_cast_fp16 = slice_by_index(begin = var_3206_begin_0, end = var_3206_end_0, end_mask = var_3206_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3206_cast_fp16")];
+            tensor<int32, [4]> var_3213_begin_0 = const()[name = tensor<string, []>("op_3213_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3213_end_0 = const()[name = tensor<string, []>("op_3213_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3213_end_mask_0 = const()[name = tensor<string, []>("op_3213_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3213_cast_fp16 = slice_by_index(begin = var_3213_begin_0, end = var_3213_end_0, end_mask = var_3213_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3213_cast_fp16")];
+            tensor<int32, [4]> var_3220_begin_0 = const()[name = tensor<string, []>("op_3220_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3220_end_0 = const()[name = tensor<string, []>("op_3220_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3220_end_mask_0 = const()[name = tensor<string, []>("op_3220_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3220_cast_fp16 = slice_by_index(begin = var_3220_begin_0, end = var_3220_end_0, end_mask = var_3220_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3220_cast_fp16")];
+            tensor<int32, [4]> var_3227_begin_0 = const()[name = tensor<string, []>("op_3227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3227_end_0 = const()[name = tensor<string, []>("op_3227_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3227_end_mask_0 = const()[name = tensor<string, []>("op_3227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3227_cast_fp16 = slice_by_index(begin = var_3227_begin_0, end = var_3227_end_0, end_mask = var_3227_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3227_cast_fp16")];
+            tensor<int32, [4]> var_3234_begin_0 = const()[name = tensor<string, []>("op_3234_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3234_end_0 = const()[name = tensor<string, []>("op_3234_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3234_end_mask_0 = const()[name = tensor<string, []>("op_3234_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3234_cast_fp16 = slice_by_index(begin = var_3234_begin_0, end = var_3234_end_0, end_mask = var_3234_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3234_cast_fp16")];
+            tensor<int32, [4]> var_3241_begin_0 = const()[name = tensor<string, []>("op_3241_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3241_end_0 = const()[name = tensor<string, []>("op_3241_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3241_end_mask_0 = const()[name = tensor<string, []>("op_3241_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3241_cast_fp16 = slice_by_index(begin = var_3241_begin_0, end = var_3241_end_0, end_mask = var_3241_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<int32, [4]> var_3248_begin_0 = const()[name = tensor<string, []>("op_3248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3248_end_0 = const()[name = tensor<string, []>("op_3248_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3248_end_mask_0 = const()[name = tensor<string, []>("op_3248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3248_cast_fp16 = slice_by_index(begin = var_3248_begin_0, end = var_3248_end_0, end_mask = var_3248_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3248_cast_fp16")];
+            tensor<int32, [4]> var_3255_begin_0 = const()[name = tensor<string, []>("op_3255_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3255_end_0 = const()[name = tensor<string, []>("op_3255_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3255_end_mask_0 = const()[name = tensor<string, []>("op_3255_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3255_cast_fp16 = slice_by_index(begin = var_3255_begin_0, end = var_3255_end_0, end_mask = var_3255_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<int32, [4]> var_3262_begin_0 = const()[name = tensor<string, []>("op_3262_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3262_end_0 = const()[name = tensor<string, []>("op_3262_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3262_end_mask_0 = const()[name = tensor<string, []>("op_3262_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3262_cast_fp16 = slice_by_index(begin = var_3262_begin_0, end = var_3262_end_0, end_mask = var_3262_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3262_cast_fp16")];
+            tensor<int32, [4]> var_3269_begin_0 = const()[name = tensor<string, []>("op_3269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3269_end_0 = const()[name = tensor<string, []>("op_3269_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3269_end_mask_0 = const()[name = tensor<string, []>("op_3269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3269_cast_fp16 = slice_by_index(begin = var_3269_begin_0, end = var_3269_end_0, end_mask = var_3269_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3269_cast_fp16")];
+            tensor<int32, [4]> var_3276_begin_0 = const()[name = tensor<string, []>("op_3276_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3276_end_0 = const()[name = tensor<string, []>("op_3276_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3276_end_mask_0 = const()[name = tensor<string, []>("op_3276_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3276_cast_fp16 = slice_by_index(begin = var_3276_begin_0, end = var_3276_end_0, end_mask = var_3276_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<int32, [4]> var_3283_begin_0 = const()[name = tensor<string, []>("op_3283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3283_end_0 = const()[name = tensor<string, []>("op_3283_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3283_end_mask_0 = const()[name = tensor<string, []>("op_3283_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3283_cast_fp16 = slice_by_index(begin = var_3283_begin_0, end = var_3283_end_0, end_mask = var_3283_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3283_cast_fp16")];
+            tensor<int32, [4]> var_3290_begin_0 = const()[name = tensor<string, []>("op_3290_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3290_end_0 = const()[name = tensor<string, []>("op_3290_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3290_end_mask_0 = const()[name = tensor<string, []>("op_3290_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3290_cast_fp16 = slice_by_index(begin = var_3290_begin_0, end = var_3290_end_0, end_mask = var_3290_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<int32, [4]> var_3297_begin_0 = const()[name = tensor<string, []>("op_3297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3297_end_0 = const()[name = tensor<string, []>("op_3297_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3297_end_mask_0 = const()[name = tensor<string, []>("op_3297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3297_cast_fp16 = slice_by_index(begin = var_3297_begin_0, end = var_3297_end_0, end_mask = var_3297_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3297_cast_fp16")];
+            tensor<int32, [4]> var_3304_begin_0 = const()[name = tensor<string, []>("op_3304_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3304_end_0 = const()[name = tensor<string, []>("op_3304_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3304_end_mask_0 = const()[name = tensor<string, []>("op_3304_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3304_cast_fp16 = slice_by_index(begin = var_3304_begin_0, end = var_3304_end_0, end_mask = var_3304_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3304_cast_fp16")];
+            tensor<int32, [4]> var_3311_begin_0 = const()[name = tensor<string, []>("op_3311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3311_end_0 = const()[name = tensor<string, []>("op_3311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3311_end_mask_0 = const()[name = tensor<string, []>("op_3311_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3311_cast_fp16 = slice_by_index(begin = var_3311_begin_0, end = var_3311_end_0, end_mask = var_3311_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3311_cast_fp16")];
+            tensor<int32, [4]> var_3318_begin_0 = const()[name = tensor<string, []>("op_3318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3318_end_0 = const()[name = tensor<string, []>("op_3318_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3318_end_mask_0 = const()[name = tensor<string, []>("op_3318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3318_cast_fp16 = slice_by_index(begin = var_3318_begin_0, end = var_3318_end_0, end_mask = var_3318_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3318_cast_fp16")];
+            tensor<int32, [4]> var_3325_begin_0 = const()[name = tensor<string, []>("op_3325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3325_end_0 = const()[name = tensor<string, []>("op_3325_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3325_end_mask_0 = const()[name = tensor<string, []>("op_3325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3325_cast_fp16 = slice_by_index(begin = var_3325_begin_0, end = var_3325_end_0, end_mask = var_3325_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3325_cast_fp16")];
+            tensor<int32, [4]> var_3332_begin_0 = const()[name = tensor<string, []>("op_3332_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3332_end_0 = const()[name = tensor<string, []>("op_3332_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3332_end_mask_0 = const()[name = tensor<string, []>("op_3332_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3332_cast_fp16 = slice_by_index(begin = var_3332_begin_0, end = var_3332_end_0, end_mask = var_3332_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3332_cast_fp16")];
+            tensor<int32, [4]> var_3339_begin_0 = const()[name = tensor<string, []>("op_3339_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3339_end_0 = const()[name = tensor<string, []>("op_3339_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3339_end_mask_0 = const()[name = tensor<string, []>("op_3339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3339_cast_fp16 = slice_by_index(begin = var_3339_begin_0, end = var_3339_end_0, end_mask = var_3339_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3339_cast_fp16")];
+            tensor<int32, [4]> var_3346_begin_0 = const()[name = tensor<string, []>("op_3346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3346_end_0 = const()[name = tensor<string, []>("op_3346_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3346_end_mask_0 = const()[name = tensor<string, []>("op_3346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3346_cast_fp16 = slice_by_index(begin = var_3346_begin_0, end = var_3346_end_0, end_mask = var_3346_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3346_cast_fp16")];
+            tensor<int32, [4]> var_3353_begin_0 = const()[name = tensor<string, []>("op_3353_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3353_end_0 = const()[name = tensor<string, []>("op_3353_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3353_end_mask_0 = const()[name = tensor<string, []>("op_3353_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3353_cast_fp16 = slice_by_index(begin = var_3353_begin_0, end = var_3353_end_0, end_mask = var_3353_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3353_cast_fp16")];
+            tensor<int32, [4]> var_3360_begin_0 = const()[name = tensor<string, []>("op_3360_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3360_end_0 = const()[name = tensor<string, []>("op_3360_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3360_end_mask_0 = const()[name = tensor<string, []>("op_3360_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3360_cast_fp16 = slice_by_index(begin = var_3360_begin_0, end = var_3360_end_0, end_mask = var_3360_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3360_cast_fp16")];
+            tensor<int32, [4]> var_3367_begin_0 = const()[name = tensor<string, []>("op_3367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3367_end_0 = const()[name = tensor<string, []>("op_3367_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3367_end_mask_0 = const()[name = tensor<string, []>("op_3367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3367_cast_fp16 = slice_by_index(begin = var_3367_begin_0, end = var_3367_end_0, end_mask = var_3367_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3367_cast_fp16")];
+            tensor<int32, [4]> var_3374_begin_0 = const()[name = tensor<string, []>("op_3374_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3374_end_0 = const()[name = tensor<string, []>("op_3374_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3374_end_mask_0 = const()[name = tensor<string, []>("op_3374_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3374_cast_fp16 = slice_by_index(begin = var_3374_begin_0, end = var_3374_end_0, end_mask = var_3374_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3374_cast_fp16")];
+            tensor<int32, [4]> var_3381_begin_0 = const()[name = tensor<string, []>("op_3381_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3381_end_0 = const()[name = tensor<string, []>("op_3381_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3381_end_mask_0 = const()[name = tensor<string, []>("op_3381_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3381_cast_fp16 = slice_by_index(begin = var_3381_begin_0, end = var_3381_end_0, end_mask = var_3381_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3381_cast_fp16")];
+            tensor<int32, [4]> var_3388_begin_0 = const()[name = tensor<string, []>("op_3388_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3388_end_0 = const()[name = tensor<string, []>("op_3388_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3388_end_mask_0 = const()[name = tensor<string, []>("op_3388_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3388_cast_fp16 = slice_by_index(begin = var_3388_begin_0, end = var_3388_end_0, end_mask = var_3388_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3388_cast_fp16")];
+            tensor<int32, [4]> var_3395_begin_0 = const()[name = tensor<string, []>("op_3395_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3395_end_0 = const()[name = tensor<string, []>("op_3395_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3395_end_mask_0 = const()[name = tensor<string, []>("op_3395_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3395_cast_fp16 = slice_by_index(begin = var_3395_begin_0, end = var_3395_end_0, end_mask = var_3395_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3395_cast_fp16")];
+            tensor<int32, [4]> var_3402_begin_0 = const()[name = tensor<string, []>("op_3402_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3402_end_0 = const()[name = tensor<string, []>("op_3402_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3402_end_mask_0 = const()[name = tensor<string, []>("op_3402_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3402_cast_fp16 = slice_by_index(begin = var_3402_begin_0, end = var_3402_end_0, end_mask = var_3402_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3402_cast_fp16")];
+            tensor<int32, [4]> var_3409_begin_0 = const()[name = tensor<string, []>("op_3409_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3409_end_0 = const()[name = tensor<string, []>("op_3409_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3409_end_mask_0 = const()[name = tensor<string, []>("op_3409_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3409_cast_fp16 = slice_by_index(begin = var_3409_begin_0, end = var_3409_end_0, end_mask = var_3409_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3409_cast_fp16")];
+            tensor<int32, [4]> var_3416_begin_0 = const()[name = tensor<string, []>("op_3416_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3416_end_0 = const()[name = tensor<string, []>("op_3416_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3416_end_mask_0 = const()[name = tensor<string, []>("op_3416_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3416_cast_fp16 = slice_by_index(begin = var_3416_begin_0, end = var_3416_end_0, end_mask = var_3416_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3416_cast_fp16")];
+            tensor<int32, [4]> var_3423_begin_0 = const()[name = tensor<string, []>("op_3423_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3423_end_0 = const()[name = tensor<string, []>("op_3423_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3423_end_mask_0 = const()[name = tensor<string, []>("op_3423_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3423_cast_fp16 = slice_by_index(begin = var_3423_begin_0, end = var_3423_end_0, end_mask = var_3423_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3423_cast_fp16")];
+            tensor<int32, [4]> var_3430_begin_0 = const()[name = tensor<string, []>("op_3430_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3430_end_0 = const()[name = tensor<string, []>("op_3430_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3430_end_mask_0 = const()[name = tensor<string, []>("op_3430_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3430_cast_fp16 = slice_by_index(begin = var_3430_begin_0, end = var_3430_end_0, end_mask = var_3430_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3430_cast_fp16")];
+            tensor<int32, [4]> var_3437_begin_0 = const()[name = tensor<string, []>("op_3437_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3437_end_0 = const()[name = tensor<string, []>("op_3437_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3437_end_mask_0 = const()[name = tensor<string, []>("op_3437_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3437_cast_fp16 = slice_by_index(begin = var_3437_begin_0, end = var_3437_end_0, end_mask = var_3437_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3437_cast_fp16")];
+            tensor<int32, [4]> var_3444_begin_0 = const()[name = tensor<string, []>("op_3444_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3444_end_0 = const()[name = tensor<string, []>("op_3444_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3444_end_mask_0 = const()[name = tensor<string, []>("op_3444_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3444_cast_fp16 = slice_by_index(begin = var_3444_begin_0, end = var_3444_end_0, end_mask = var_3444_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3444_cast_fp16")];
+            tensor<int32, [4]> var_3451_begin_0 = const()[name = tensor<string, []>("op_3451_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3451_end_0 = const()[name = tensor<string, []>("op_3451_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3451_end_mask_0 = const()[name = tensor<string, []>("op_3451_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3451_cast_fp16 = slice_by_index(begin = var_3451_begin_0, end = var_3451_end_0, end_mask = var_3451_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3451_cast_fp16")];
+            tensor<int32, [4]> var_3458_begin_0 = const()[name = tensor<string, []>("op_3458_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3458_end_0 = const()[name = tensor<string, []>("op_3458_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3458_end_mask_0 = const()[name = tensor<string, []>("op_3458_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3458_cast_fp16 = slice_by_index(begin = var_3458_begin_0, end = var_3458_end_0, end_mask = var_3458_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3458_cast_fp16")];
+            tensor<int32, [4]> var_3465_begin_0 = const()[name = tensor<string, []>("op_3465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3465_end_0 = const()[name = tensor<string, []>("op_3465_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3465_end_mask_0 = const()[name = tensor<string, []>("op_3465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3465_cast_fp16 = slice_by_index(begin = var_3465_begin_0, end = var_3465_end_0, end_mask = var_3465_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3465_cast_fp16")];
+            tensor<int32, [4]> k_7_perm_0 = const()[name = tensor<string, []>("k_7_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_3470_begin_0 = const()[name = tensor<string, []>("op_3470_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3470_end_0 = const()[name = tensor<string, []>("op_3470_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_3470_end_mask_0 = const()[name = tensor<string, []>("op_3470_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_8 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3470_cast_fp16 = slice_by_index(begin = var_3470_begin_0, end = var_3470_end_0, end_mask = var_3470_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3470_cast_fp16")];
+            tensor<int32, [4]> var_3474_begin_0 = const()[name = tensor<string, []>("op_3474_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_3474_end_0 = const()[name = tensor<string, []>("op_3474_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_3474_end_mask_0 = const()[name = tensor<string, []>("op_3474_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3474_cast_fp16 = slice_by_index(begin = var_3474_begin_0, end = var_3474_end_0, end_mask = var_3474_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3474_cast_fp16")];
+            tensor<int32, [4]> var_3478_begin_0 = const()[name = tensor<string, []>("op_3478_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_3478_end_0 = const()[name = tensor<string, []>("op_3478_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_3478_end_mask_0 = const()[name = tensor<string, []>("op_3478_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3478_cast_fp16 = slice_by_index(begin = var_3478_begin_0, end = var_3478_end_0, end_mask = var_3478_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3478_cast_fp16")];
+            tensor<int32, [4]> var_3482_begin_0 = const()[name = tensor<string, []>("op_3482_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_3482_end_0 = const()[name = tensor<string, []>("op_3482_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_3482_end_mask_0 = const()[name = tensor<string, []>("op_3482_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3482_cast_fp16 = slice_by_index(begin = var_3482_begin_0, end = var_3482_end_0, end_mask = var_3482_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3482_cast_fp16")];
+            tensor<int32, [4]> var_3486_begin_0 = const()[name = tensor<string, []>("op_3486_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_3486_end_0 = const()[name = tensor<string, []>("op_3486_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_3486_end_mask_0 = const()[name = tensor<string, []>("op_3486_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3486_cast_fp16 = slice_by_index(begin = var_3486_begin_0, end = var_3486_end_0, end_mask = var_3486_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3486_cast_fp16")];
+            tensor<int32, [4]> var_3490_begin_0 = const()[name = tensor<string, []>("op_3490_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_3490_end_0 = const()[name = tensor<string, []>("op_3490_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_3490_end_mask_0 = const()[name = tensor<string, []>("op_3490_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3490_cast_fp16 = slice_by_index(begin = var_3490_begin_0, end = var_3490_end_0, end_mask = var_3490_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3490_cast_fp16")];
+            tensor<int32, [4]> var_3494_begin_0 = const()[name = tensor<string, []>("op_3494_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_3494_end_0 = const()[name = tensor<string, []>("op_3494_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_3494_end_mask_0 = const()[name = tensor<string, []>("op_3494_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3494_cast_fp16 = slice_by_index(begin = var_3494_begin_0, end = var_3494_end_0, end_mask = var_3494_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3494_cast_fp16")];
+            tensor<int32, [4]> var_3498_begin_0 = const()[name = tensor<string, []>("op_3498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_3498_end_0 = const()[name = tensor<string, []>("op_3498_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_3498_end_mask_0 = const()[name = tensor<string, []>("op_3498_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3498_cast_fp16")];
+            tensor<int32, [4]> var_3502_begin_0 = const()[name = tensor<string, []>("op_3502_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_3502_end_0 = const()[name = tensor<string, []>("op_3502_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_3502_end_mask_0 = const()[name = tensor<string, []>("op_3502_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3502_cast_fp16 = slice_by_index(begin = var_3502_begin_0, end = var_3502_end_0, end_mask = var_3502_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3502_cast_fp16")];
+            tensor<int32, [4]> var_3506_begin_0 = const()[name = tensor<string, []>("op_3506_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_3506_end_0 = const()[name = tensor<string, []>("op_3506_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_3506_end_mask_0 = const()[name = tensor<string, []>("op_3506_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3506_cast_fp16 = slice_by_index(begin = var_3506_begin_0, end = var_3506_end_0, end_mask = var_3506_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3506_cast_fp16")];
+            tensor<int32, [4]> var_3510_begin_0 = const()[name = tensor<string, []>("op_3510_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_3510_end_0 = const()[name = tensor<string, []>("op_3510_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_3510_end_mask_0 = const()[name = tensor<string, []>("op_3510_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3510_cast_fp16 = slice_by_index(begin = var_3510_begin_0, end = var_3510_end_0, end_mask = var_3510_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3510_cast_fp16")];
+            tensor<int32, [4]> var_3514_begin_0 = const()[name = tensor<string, []>("op_3514_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_3514_end_0 = const()[name = tensor<string, []>("op_3514_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_3514_end_mask_0 = const()[name = tensor<string, []>("op_3514_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3514_cast_fp16 = slice_by_index(begin = var_3514_begin_0, end = var_3514_end_0, end_mask = var_3514_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3514_cast_fp16")];
+            tensor<int32, [4]> var_3516_begin_0 = const()[name = tensor<string, []>("op_3516_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3516_end_0 = const()[name = tensor<string, []>("op_3516_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3516_end_mask_0 = const()[name = tensor<string, []>("op_3516_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3516_cast_fp16 = slice_by_index(begin = var_3516_begin_0, end = var_3516_end_0, end_mask = var_3516_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3516_cast_fp16")];
+            tensor<int32, [4]> var_3520_begin_0 = const()[name = tensor<string, []>("op_3520_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3520_end_0 = const()[name = tensor<string, []>("op_3520_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3520_end_mask_0 = const()[name = tensor<string, []>("op_3520_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3520_cast_fp16 = slice_by_index(begin = var_3520_begin_0, end = var_3520_end_0, end_mask = var_3520_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3520_cast_fp16")];
+            tensor<int32, [4]> var_3524_begin_0 = const()[name = tensor<string, []>("op_3524_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3524_end_0 = const()[name = tensor<string, []>("op_3524_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3524_end_mask_0 = const()[name = tensor<string, []>("op_3524_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3524_cast_fp16 = slice_by_index(begin = var_3524_begin_0, end = var_3524_end_0, end_mask = var_3524_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3524_cast_fp16")];
+            tensor<int32, [4]> var_3528_begin_0 = const()[name = tensor<string, []>("op_3528_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3528_end_0 = const()[name = tensor<string, []>("op_3528_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3528_end_mask_0 = const()[name = tensor<string, []>("op_3528_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3528_cast_fp16 = slice_by_index(begin = var_3528_begin_0, end = var_3528_end_0, end_mask = var_3528_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3528_cast_fp16")];
+            tensor<int32, [4]> var_3532_begin_0 = const()[name = tensor<string, []>("op_3532_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3532_end_0 = const()[name = tensor<string, []>("op_3532_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3532_end_mask_0 = const()[name = tensor<string, []>("op_3532_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3532_cast_fp16 = slice_by_index(begin = var_3532_begin_0, end = var_3532_end_0, end_mask = var_3532_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3532_cast_fp16")];
+            tensor<int32, [4]> var_3536_begin_0 = const()[name = tensor<string, []>("op_3536_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3536_end_0 = const()[name = tensor<string, []>("op_3536_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3536_end_mask_0 = const()[name = tensor<string, []>("op_3536_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3536_cast_fp16 = slice_by_index(begin = var_3536_begin_0, end = var_3536_end_0, end_mask = var_3536_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3536_cast_fp16")];
+            tensor<int32, [4]> var_3540_begin_0 = const()[name = tensor<string, []>("op_3540_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3540_end_0 = const()[name = tensor<string, []>("op_3540_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3540_end_mask_0 = const()[name = tensor<string, []>("op_3540_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3540_cast_fp16 = slice_by_index(begin = var_3540_begin_0, end = var_3540_end_0, end_mask = var_3540_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3540_cast_fp16")];
+            tensor<int32, [4]> var_3544_begin_0 = const()[name = tensor<string, []>("op_3544_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3544_end_0 = const()[name = tensor<string, []>("op_3544_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3544_end_mask_0 = const()[name = tensor<string, []>("op_3544_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3544_cast_fp16 = slice_by_index(begin = var_3544_begin_0, end = var_3544_end_0, end_mask = var_3544_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3544_cast_fp16")];
+            tensor<int32, [4]> var_3548_begin_0 = const()[name = tensor<string, []>("op_3548_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_3548_end_0 = const()[name = tensor<string, []>("op_3548_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_3548_end_mask_0 = const()[name = tensor<string, []>("op_3548_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3548_cast_fp16 = slice_by_index(begin = var_3548_begin_0, end = var_3548_end_0, end_mask = var_3548_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3548_cast_fp16")];
+            tensor<int32, [4]> var_3552_begin_0 = const()[name = tensor<string, []>("op_3552_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_3552_end_0 = const()[name = tensor<string, []>("op_3552_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_3552_end_mask_0 = const()[name = tensor<string, []>("op_3552_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3552_cast_fp16 = slice_by_index(begin = var_3552_begin_0, end = var_3552_end_0, end_mask = var_3552_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3552_cast_fp16")];
+            tensor<int32, [4]> var_3556_begin_0 = const()[name = tensor<string, []>("op_3556_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_3556_end_0 = const()[name = tensor<string, []>("op_3556_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_3556_end_mask_0 = const()[name = tensor<string, []>("op_3556_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3556_cast_fp16 = slice_by_index(begin = var_3556_begin_0, end = var_3556_end_0, end_mask = var_3556_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3556_cast_fp16")];
+            tensor<int32, [4]> var_3560_begin_0 = const()[name = tensor<string, []>("op_3560_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_3560_end_0 = const()[name = tensor<string, []>("op_3560_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_3560_end_mask_0 = const()[name = tensor<string, []>("op_3560_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3560_cast_fp16 = slice_by_index(begin = var_3560_begin_0, end = var_3560_end_0, end_mask = var_3560_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3560_cast_fp16")];
+            tensor<string, []> var_3564_equation_0 = const()[name = tensor<string, []>("op_3564_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3564_cast_fp16 = einsum(equation = var_3564_equation_0, values = (var_3470_cast_fp16, var_3136_cast_fp16))[name = tensor<string, []>("op_3564_cast_fp16")];
+            tensor<fp16, []> var_3565_to_fp16 = const()[name = tensor<string, []>("op_3565_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_289_cast_fp16 = mul(x = var_3564_cast_fp16, y = var_3565_to_fp16)[name = tensor<string, []>("aw_chunk_289_cast_fp16")];
+            tensor<string, []> var_3568_equation_0 = const()[name = tensor<string, []>("op_3568_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3568_cast_fp16 = einsum(equation = var_3568_equation_0, values = (var_3470_cast_fp16, var_3143_cast_fp16))[name = tensor<string, []>("op_3568_cast_fp16")];
+            tensor<fp16, []> var_3569_to_fp16 = const()[name = tensor<string, []>("op_3569_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_291_cast_fp16 = mul(x = var_3568_cast_fp16, y = var_3569_to_fp16)[name = tensor<string, []>("aw_chunk_291_cast_fp16")];
+            tensor<string, []> var_3572_equation_0 = const()[name = tensor<string, []>("op_3572_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3572_cast_fp16 = einsum(equation = var_3572_equation_0, values = (var_3470_cast_fp16, var_3150_cast_fp16))[name = tensor<string, []>("op_3572_cast_fp16")];
+            tensor<fp16, []> var_3573_to_fp16 = const()[name = tensor<string, []>("op_3573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_293_cast_fp16 = mul(x = var_3572_cast_fp16, y = var_3573_to_fp16)[name = tensor<string, []>("aw_chunk_293_cast_fp16")];
+            tensor<string, []> var_3576_equation_0 = const()[name = tensor<string, []>("op_3576_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3576_cast_fp16 = einsum(equation = var_3576_equation_0, values = (var_3470_cast_fp16, var_3157_cast_fp16))[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, []> var_3577_to_fp16 = const()[name = tensor<string, []>("op_3577_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_295_cast_fp16 = mul(x = var_3576_cast_fp16, y = var_3577_to_fp16)[name = tensor<string, []>("aw_chunk_295_cast_fp16")];
+            tensor<string, []> var_3580_equation_0 = const()[name = tensor<string, []>("op_3580_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3580_cast_fp16 = einsum(equation = var_3580_equation_0, values = (var_3474_cast_fp16, var_3164_cast_fp16))[name = tensor<string, []>("op_3580_cast_fp16")];
+            tensor<fp16, []> var_3581_to_fp16 = const()[name = tensor<string, []>("op_3581_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_297_cast_fp16 = mul(x = var_3580_cast_fp16, y = var_3581_to_fp16)[name = tensor<string, []>("aw_chunk_297_cast_fp16")];
+            tensor<string, []> var_3584_equation_0 = const()[name = tensor<string, []>("op_3584_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3584_cast_fp16 = einsum(equation = var_3584_equation_0, values = (var_3474_cast_fp16, var_3171_cast_fp16))[name = tensor<string, []>("op_3584_cast_fp16")];
+            tensor<fp16, []> var_3585_to_fp16 = const()[name = tensor<string, []>("op_3585_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_299_cast_fp16 = mul(x = var_3584_cast_fp16, y = var_3585_to_fp16)[name = tensor<string, []>("aw_chunk_299_cast_fp16")];
+            tensor<string, []> var_3588_equation_0 = const()[name = tensor<string, []>("op_3588_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3588_cast_fp16 = einsum(equation = var_3588_equation_0, values = (var_3474_cast_fp16, var_3178_cast_fp16))[name = tensor<string, []>("op_3588_cast_fp16")];
+            tensor<fp16, []> var_3589_to_fp16 = const()[name = tensor<string, []>("op_3589_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_301_cast_fp16 = mul(x = var_3588_cast_fp16, y = var_3589_to_fp16)[name = tensor<string, []>("aw_chunk_301_cast_fp16")];
+            tensor<string, []> var_3592_equation_0 = const()[name = tensor<string, []>("op_3592_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3592_cast_fp16 = einsum(equation = var_3592_equation_0, values = (var_3474_cast_fp16, var_3185_cast_fp16))[name = tensor<string, []>("op_3592_cast_fp16")];
+            tensor<fp16, []> var_3593_to_fp16 = const()[name = tensor<string, []>("op_3593_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_303_cast_fp16 = mul(x = var_3592_cast_fp16, y = var_3593_to_fp16)[name = tensor<string, []>("aw_chunk_303_cast_fp16")];
+            tensor<string, []> var_3596_equation_0 = const()[name = tensor<string, []>("op_3596_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3596_cast_fp16 = einsum(equation = var_3596_equation_0, values = (var_3478_cast_fp16, var_3192_cast_fp16))[name = tensor<string, []>("op_3596_cast_fp16")];
+            tensor<fp16, []> var_3597_to_fp16 = const()[name = tensor<string, []>("op_3597_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_305_cast_fp16 = mul(x = var_3596_cast_fp16, y = var_3597_to_fp16)[name = tensor<string, []>("aw_chunk_305_cast_fp16")];
+            tensor<string, []> var_3600_equation_0 = const()[name = tensor<string, []>("op_3600_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3600_cast_fp16 = einsum(equation = var_3600_equation_0, values = (var_3478_cast_fp16, var_3199_cast_fp16))[name = tensor<string, []>("op_3600_cast_fp16")];
+            tensor<fp16, []> var_3601_to_fp16 = const()[name = tensor<string, []>("op_3601_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_307_cast_fp16 = mul(x = var_3600_cast_fp16, y = var_3601_to_fp16)[name = tensor<string, []>("aw_chunk_307_cast_fp16")];
+            tensor<string, []> var_3604_equation_0 = const()[name = tensor<string, []>("op_3604_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3604_cast_fp16 = einsum(equation = var_3604_equation_0, values = (var_3478_cast_fp16, var_3206_cast_fp16))[name = tensor<string, []>("op_3604_cast_fp16")];
+            tensor<fp16, []> var_3605_to_fp16 = const()[name = tensor<string, []>("op_3605_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_309_cast_fp16 = mul(x = var_3604_cast_fp16, y = var_3605_to_fp16)[name = tensor<string, []>("aw_chunk_309_cast_fp16")];
+            tensor<string, []> var_3608_equation_0 = const()[name = tensor<string, []>("op_3608_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3608_cast_fp16 = einsum(equation = var_3608_equation_0, values = (var_3478_cast_fp16, var_3213_cast_fp16))[name = tensor<string, []>("op_3608_cast_fp16")];
+            tensor<fp16, []> var_3609_to_fp16 = const()[name = tensor<string, []>("op_3609_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_311_cast_fp16 = mul(x = var_3608_cast_fp16, y = var_3609_to_fp16)[name = tensor<string, []>("aw_chunk_311_cast_fp16")];
+            tensor<string, []> var_3612_equation_0 = const()[name = tensor<string, []>("op_3612_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3612_cast_fp16 = einsum(equation = var_3612_equation_0, values = (var_3482_cast_fp16, var_3220_cast_fp16))[name = tensor<string, []>("op_3612_cast_fp16")];
+            tensor<fp16, []> var_3613_to_fp16 = const()[name = tensor<string, []>("op_3613_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_313_cast_fp16 = mul(x = var_3612_cast_fp16, y = var_3613_to_fp16)[name = tensor<string, []>("aw_chunk_313_cast_fp16")];
+            tensor<string, []> var_3616_equation_0 = const()[name = tensor<string, []>("op_3616_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3616_cast_fp16 = einsum(equation = var_3616_equation_0, values = (var_3482_cast_fp16, var_3227_cast_fp16))[name = tensor<string, []>("op_3616_cast_fp16")];
+            tensor<fp16, []> var_3617_to_fp16 = const()[name = tensor<string, []>("op_3617_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_315_cast_fp16 = mul(x = var_3616_cast_fp16, y = var_3617_to_fp16)[name = tensor<string, []>("aw_chunk_315_cast_fp16")];
+            tensor<string, []> var_3620_equation_0 = const()[name = tensor<string, []>("op_3620_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3620_cast_fp16 = einsum(equation = var_3620_equation_0, values = (var_3482_cast_fp16, var_3234_cast_fp16))[name = tensor<string, []>("op_3620_cast_fp16")];
+            tensor<fp16, []> var_3621_to_fp16 = const()[name = tensor<string, []>("op_3621_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_317_cast_fp16 = mul(x = var_3620_cast_fp16, y = var_3621_to_fp16)[name = tensor<string, []>("aw_chunk_317_cast_fp16")];
+            tensor<string, []> var_3624_equation_0 = const()[name = tensor<string, []>("op_3624_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3624_cast_fp16 = einsum(equation = var_3624_equation_0, values = (var_3482_cast_fp16, var_3241_cast_fp16))[name = tensor<string, []>("op_3624_cast_fp16")];
+            tensor<fp16, []> var_3625_to_fp16 = const()[name = tensor<string, []>("op_3625_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_319_cast_fp16 = mul(x = var_3624_cast_fp16, y = var_3625_to_fp16)[name = tensor<string, []>("aw_chunk_319_cast_fp16")];
+            tensor<string, []> var_3628_equation_0 = const()[name = tensor<string, []>("op_3628_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3628_cast_fp16 = einsum(equation = var_3628_equation_0, values = (var_3486_cast_fp16, var_3248_cast_fp16))[name = tensor<string, []>("op_3628_cast_fp16")];
+            tensor<fp16, []> var_3629_to_fp16 = const()[name = tensor<string, []>("op_3629_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_321_cast_fp16 = mul(x = var_3628_cast_fp16, y = var_3629_to_fp16)[name = tensor<string, []>("aw_chunk_321_cast_fp16")];
+            tensor<string, []> var_3632_equation_0 = const()[name = tensor<string, []>("op_3632_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3632_cast_fp16 = einsum(equation = var_3632_equation_0, values = (var_3486_cast_fp16, var_3255_cast_fp16))[name = tensor<string, []>("op_3632_cast_fp16")];
+            tensor<fp16, []> var_3633_to_fp16 = const()[name = tensor<string, []>("op_3633_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_323_cast_fp16 = mul(x = var_3632_cast_fp16, y = var_3633_to_fp16)[name = tensor<string, []>("aw_chunk_323_cast_fp16")];
+            tensor<string, []> var_3636_equation_0 = const()[name = tensor<string, []>("op_3636_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3636_cast_fp16 = einsum(equation = var_3636_equation_0, values = (var_3486_cast_fp16, var_3262_cast_fp16))[name = tensor<string, []>("op_3636_cast_fp16")];
+            tensor<fp16, []> var_3637_to_fp16 = const()[name = tensor<string, []>("op_3637_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_325_cast_fp16 = mul(x = var_3636_cast_fp16, y = var_3637_to_fp16)[name = tensor<string, []>("aw_chunk_325_cast_fp16")];
+            tensor<string, []> var_3640_equation_0 = const()[name = tensor<string, []>("op_3640_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3640_cast_fp16 = einsum(equation = var_3640_equation_0, values = (var_3486_cast_fp16, var_3269_cast_fp16))[name = tensor<string, []>("op_3640_cast_fp16")];
+            tensor<fp16, []> var_3641_to_fp16 = const()[name = tensor<string, []>("op_3641_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_327_cast_fp16 = mul(x = var_3640_cast_fp16, y = var_3641_to_fp16)[name = tensor<string, []>("aw_chunk_327_cast_fp16")];
+            tensor<string, []> var_3644_equation_0 = const()[name = tensor<string, []>("op_3644_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3644_cast_fp16 = einsum(equation = var_3644_equation_0, values = (var_3490_cast_fp16, var_3276_cast_fp16))[name = tensor<string, []>("op_3644_cast_fp16")];
+            tensor<fp16, []> var_3645_to_fp16 = const()[name = tensor<string, []>("op_3645_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_329_cast_fp16 = mul(x = var_3644_cast_fp16, y = var_3645_to_fp16)[name = tensor<string, []>("aw_chunk_329_cast_fp16")];
+            tensor<string, []> var_3648_equation_0 = const()[name = tensor<string, []>("op_3648_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3648_cast_fp16 = einsum(equation = var_3648_equation_0, values = (var_3490_cast_fp16, var_3283_cast_fp16))[name = tensor<string, []>("op_3648_cast_fp16")];
+            tensor<fp16, []> var_3649_to_fp16 = const()[name = tensor<string, []>("op_3649_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_331_cast_fp16 = mul(x = var_3648_cast_fp16, y = var_3649_to_fp16)[name = tensor<string, []>("aw_chunk_331_cast_fp16")];
+            tensor<string, []> var_3652_equation_0 = const()[name = tensor<string, []>("op_3652_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3652_cast_fp16 = einsum(equation = var_3652_equation_0, values = (var_3490_cast_fp16, var_3290_cast_fp16))[name = tensor<string, []>("op_3652_cast_fp16")];
+            tensor<fp16, []> var_3653_to_fp16 = const()[name = tensor<string, []>("op_3653_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_333_cast_fp16 = mul(x = var_3652_cast_fp16, y = var_3653_to_fp16)[name = tensor<string, []>("aw_chunk_333_cast_fp16")];
+            tensor<string, []> var_3656_equation_0 = const()[name = tensor<string, []>("op_3656_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3656_cast_fp16 = einsum(equation = var_3656_equation_0, values = (var_3490_cast_fp16, var_3297_cast_fp16))[name = tensor<string, []>("op_3656_cast_fp16")];
+            tensor<fp16, []> var_3657_to_fp16 = const()[name = tensor<string, []>("op_3657_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_335_cast_fp16 = mul(x = var_3656_cast_fp16, y = var_3657_to_fp16)[name = tensor<string, []>("aw_chunk_335_cast_fp16")];
+            tensor<string, []> var_3660_equation_0 = const()[name = tensor<string, []>("op_3660_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3660_cast_fp16 = einsum(equation = var_3660_equation_0, values = (var_3494_cast_fp16, var_3304_cast_fp16))[name = tensor<string, []>("op_3660_cast_fp16")];
+            tensor<fp16, []> var_3661_to_fp16 = const()[name = tensor<string, []>("op_3661_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_337_cast_fp16 = mul(x = var_3660_cast_fp16, y = var_3661_to_fp16)[name = tensor<string, []>("aw_chunk_337_cast_fp16")];
+            tensor<string, []> var_3664_equation_0 = const()[name = tensor<string, []>("op_3664_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3664_cast_fp16 = einsum(equation = var_3664_equation_0, values = (var_3494_cast_fp16, var_3311_cast_fp16))[name = tensor<string, []>("op_3664_cast_fp16")];
+            tensor<fp16, []> var_3665_to_fp16 = const()[name = tensor<string, []>("op_3665_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_339_cast_fp16 = mul(x = var_3664_cast_fp16, y = var_3665_to_fp16)[name = tensor<string, []>("aw_chunk_339_cast_fp16")];
+            tensor<string, []> var_3668_equation_0 = const()[name = tensor<string, []>("op_3668_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3668_cast_fp16 = einsum(equation = var_3668_equation_0, values = (var_3494_cast_fp16, var_3318_cast_fp16))[name = tensor<string, []>("op_3668_cast_fp16")];
+            tensor<fp16, []> var_3669_to_fp16 = const()[name = tensor<string, []>("op_3669_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_341_cast_fp16 = mul(x = var_3668_cast_fp16, y = var_3669_to_fp16)[name = tensor<string, []>("aw_chunk_341_cast_fp16")];
+            tensor<string, []> var_3672_equation_0 = const()[name = tensor<string, []>("op_3672_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3672_cast_fp16 = einsum(equation = var_3672_equation_0, values = (var_3494_cast_fp16, var_3325_cast_fp16))[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<fp16, []> var_3673_to_fp16 = const()[name = tensor<string, []>("op_3673_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_343_cast_fp16 = mul(x = var_3672_cast_fp16, y = var_3673_to_fp16)[name = tensor<string, []>("aw_chunk_343_cast_fp16")];
+            tensor<string, []> var_3676_equation_0 = const()[name = tensor<string, []>("op_3676_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3676_cast_fp16 = einsum(equation = var_3676_equation_0, values = (var_3498_cast_fp16, var_3332_cast_fp16))[name = tensor<string, []>("op_3676_cast_fp16")];
+            tensor<fp16, []> var_3677_to_fp16 = const()[name = tensor<string, []>("op_3677_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_345_cast_fp16 = mul(x = var_3676_cast_fp16, y = var_3677_to_fp16)[name = tensor<string, []>("aw_chunk_345_cast_fp16")];
+            tensor<string, []> var_3680_equation_0 = const()[name = tensor<string, []>("op_3680_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3680_cast_fp16 = einsum(equation = var_3680_equation_0, values = (var_3498_cast_fp16, var_3339_cast_fp16))[name = tensor<string, []>("op_3680_cast_fp16")];
+            tensor<fp16, []> var_3681_to_fp16 = const()[name = tensor<string, []>("op_3681_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_347_cast_fp16 = mul(x = var_3680_cast_fp16, y = var_3681_to_fp16)[name = tensor<string, []>("aw_chunk_347_cast_fp16")];
+            tensor<string, []> var_3684_equation_0 = const()[name = tensor<string, []>("op_3684_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3684_cast_fp16 = einsum(equation = var_3684_equation_0, values = (var_3498_cast_fp16, var_3346_cast_fp16))[name = tensor<string, []>("op_3684_cast_fp16")];
+            tensor<fp16, []> var_3685_to_fp16 = const()[name = tensor<string, []>("op_3685_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_349_cast_fp16 = mul(x = var_3684_cast_fp16, y = var_3685_to_fp16)[name = tensor<string, []>("aw_chunk_349_cast_fp16")];
+            tensor<string, []> var_3688_equation_0 = const()[name = tensor<string, []>("op_3688_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3688_cast_fp16 = einsum(equation = var_3688_equation_0, values = (var_3498_cast_fp16, var_3353_cast_fp16))[name = tensor<string, []>("op_3688_cast_fp16")];
+            tensor<fp16, []> var_3689_to_fp16 = const()[name = tensor<string, []>("op_3689_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_351_cast_fp16 = mul(x = var_3688_cast_fp16, y = var_3689_to_fp16)[name = tensor<string, []>("aw_chunk_351_cast_fp16")];
+            tensor<string, []> var_3692_equation_0 = const()[name = tensor<string, []>("op_3692_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3692_cast_fp16 = einsum(equation = var_3692_equation_0, values = (var_3502_cast_fp16, var_3360_cast_fp16))[name = tensor<string, []>("op_3692_cast_fp16")];
+            tensor<fp16, []> var_3693_to_fp16 = const()[name = tensor<string, []>("op_3693_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_353_cast_fp16 = mul(x = var_3692_cast_fp16, y = var_3693_to_fp16)[name = tensor<string, []>("aw_chunk_353_cast_fp16")];
+            tensor<string, []> var_3696_equation_0 = const()[name = tensor<string, []>("op_3696_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3696_cast_fp16 = einsum(equation = var_3696_equation_0, values = (var_3502_cast_fp16, var_3367_cast_fp16))[name = tensor<string, []>("op_3696_cast_fp16")];
+            tensor<fp16, []> var_3697_to_fp16 = const()[name = tensor<string, []>("op_3697_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_355_cast_fp16 = mul(x = var_3696_cast_fp16, y = var_3697_to_fp16)[name = tensor<string, []>("aw_chunk_355_cast_fp16")];
+            tensor<string, []> var_3700_equation_0 = const()[name = tensor<string, []>("op_3700_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3700_cast_fp16 = einsum(equation = var_3700_equation_0, values = (var_3502_cast_fp16, var_3374_cast_fp16))[name = tensor<string, []>("op_3700_cast_fp16")];
+            tensor<fp16, []> var_3701_to_fp16 = const()[name = tensor<string, []>("op_3701_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_357_cast_fp16 = mul(x = var_3700_cast_fp16, y = var_3701_to_fp16)[name = tensor<string, []>("aw_chunk_357_cast_fp16")];
+            tensor<string, []> var_3704_equation_0 = const()[name = tensor<string, []>("op_3704_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3704_cast_fp16 = einsum(equation = var_3704_equation_0, values = (var_3502_cast_fp16, var_3381_cast_fp16))[name = tensor<string, []>("op_3704_cast_fp16")];
+            tensor<fp16, []> var_3705_to_fp16 = const()[name = tensor<string, []>("op_3705_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_359_cast_fp16 = mul(x = var_3704_cast_fp16, y = var_3705_to_fp16)[name = tensor<string, []>("aw_chunk_359_cast_fp16")];
+            tensor<string, []> var_3708_equation_0 = const()[name = tensor<string, []>("op_3708_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3708_cast_fp16 = einsum(equation = var_3708_equation_0, values = (var_3506_cast_fp16, var_3388_cast_fp16))[name = tensor<string, []>("op_3708_cast_fp16")];
+            tensor<fp16, []> var_3709_to_fp16 = const()[name = tensor<string, []>("op_3709_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_361_cast_fp16 = mul(x = var_3708_cast_fp16, y = var_3709_to_fp16)[name = tensor<string, []>("aw_chunk_361_cast_fp16")];
+            tensor<string, []> var_3712_equation_0 = const()[name = tensor<string, []>("op_3712_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3712_cast_fp16 = einsum(equation = var_3712_equation_0, values = (var_3506_cast_fp16, var_3395_cast_fp16))[name = tensor<string, []>("op_3712_cast_fp16")];
+            tensor<fp16, []> var_3713_to_fp16 = const()[name = tensor<string, []>("op_3713_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_363_cast_fp16 = mul(x = var_3712_cast_fp16, y = var_3713_to_fp16)[name = tensor<string, []>("aw_chunk_363_cast_fp16")];
+            tensor<string, []> var_3716_equation_0 = const()[name = tensor<string, []>("op_3716_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3716_cast_fp16 = einsum(equation = var_3716_equation_0, values = (var_3506_cast_fp16, var_3402_cast_fp16))[name = tensor<string, []>("op_3716_cast_fp16")];
+            tensor<fp16, []> var_3717_to_fp16 = const()[name = tensor<string, []>("op_3717_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_365_cast_fp16 = mul(x = var_3716_cast_fp16, y = var_3717_to_fp16)[name = tensor<string, []>("aw_chunk_365_cast_fp16")];
+            tensor<string, []> var_3720_equation_0 = const()[name = tensor<string, []>("op_3720_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3720_cast_fp16 = einsum(equation = var_3720_equation_0, values = (var_3506_cast_fp16, var_3409_cast_fp16))[name = tensor<string, []>("op_3720_cast_fp16")];
+            tensor<fp16, []> var_3721_to_fp16 = const()[name = tensor<string, []>("op_3721_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_367_cast_fp16 = mul(x = var_3720_cast_fp16, y = var_3721_to_fp16)[name = tensor<string, []>("aw_chunk_367_cast_fp16")];
+            tensor<string, []> var_3724_equation_0 = const()[name = tensor<string, []>("op_3724_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3724_cast_fp16 = einsum(equation = var_3724_equation_0, values = (var_3510_cast_fp16, var_3416_cast_fp16))[name = tensor<string, []>("op_3724_cast_fp16")];
+            tensor<fp16, []> var_3725_to_fp16 = const()[name = tensor<string, []>("op_3725_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_369_cast_fp16 = mul(x = var_3724_cast_fp16, y = var_3725_to_fp16)[name = tensor<string, []>("aw_chunk_369_cast_fp16")];
+            tensor<string, []> var_3728_equation_0 = const()[name = tensor<string, []>("op_3728_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3728_cast_fp16 = einsum(equation = var_3728_equation_0, values = (var_3510_cast_fp16, var_3423_cast_fp16))[name = tensor<string, []>("op_3728_cast_fp16")];
+            tensor<fp16, []> var_3729_to_fp16 = const()[name = tensor<string, []>("op_3729_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_371_cast_fp16 = mul(x = var_3728_cast_fp16, y = var_3729_to_fp16)[name = tensor<string, []>("aw_chunk_371_cast_fp16")];
+            tensor<string, []> var_3732_equation_0 = const()[name = tensor<string, []>("op_3732_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3732_cast_fp16 = einsum(equation = var_3732_equation_0, values = (var_3510_cast_fp16, var_3430_cast_fp16))[name = tensor<string, []>("op_3732_cast_fp16")];
+            tensor<fp16, []> var_3733_to_fp16 = const()[name = tensor<string, []>("op_3733_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_373_cast_fp16 = mul(x = var_3732_cast_fp16, y = var_3733_to_fp16)[name = tensor<string, []>("aw_chunk_373_cast_fp16")];
+            tensor<string, []> var_3736_equation_0 = const()[name = tensor<string, []>("op_3736_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3736_cast_fp16 = einsum(equation = var_3736_equation_0, values = (var_3510_cast_fp16, var_3437_cast_fp16))[name = tensor<string, []>("op_3736_cast_fp16")];
+            tensor<fp16, []> var_3737_to_fp16 = const()[name = tensor<string, []>("op_3737_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_375_cast_fp16 = mul(x = var_3736_cast_fp16, y = var_3737_to_fp16)[name = tensor<string, []>("aw_chunk_375_cast_fp16")];
+            tensor<string, []> var_3740_equation_0 = const()[name = tensor<string, []>("op_3740_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3740_cast_fp16 = einsum(equation = var_3740_equation_0, values = (var_3514_cast_fp16, var_3444_cast_fp16))[name = tensor<string, []>("op_3740_cast_fp16")];
+            tensor<fp16, []> var_3741_to_fp16 = const()[name = tensor<string, []>("op_3741_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_377_cast_fp16 = mul(x = var_3740_cast_fp16, y = var_3741_to_fp16)[name = tensor<string, []>("aw_chunk_377_cast_fp16")];
+            tensor<string, []> var_3744_equation_0 = const()[name = tensor<string, []>("op_3744_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3744_cast_fp16 = einsum(equation = var_3744_equation_0, values = (var_3514_cast_fp16, var_3451_cast_fp16))[name = tensor<string, []>("op_3744_cast_fp16")];
+            tensor<fp16, []> var_3745_to_fp16 = const()[name = tensor<string, []>("op_3745_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_379_cast_fp16 = mul(x = var_3744_cast_fp16, y = var_3745_to_fp16)[name = tensor<string, []>("aw_chunk_379_cast_fp16")];
+            tensor<string, []> var_3748_equation_0 = const()[name = tensor<string, []>("op_3748_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3748_cast_fp16 = einsum(equation = var_3748_equation_0, values = (var_3514_cast_fp16, var_3458_cast_fp16))[name = tensor<string, []>("op_3748_cast_fp16")];
+            tensor<fp16, []> var_3749_to_fp16 = const()[name = tensor<string, []>("op_3749_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_381_cast_fp16 = mul(x = var_3748_cast_fp16, y = var_3749_to_fp16)[name = tensor<string, []>("aw_chunk_381_cast_fp16")];
+            tensor<string, []> var_3752_equation_0 = const()[name = tensor<string, []>("op_3752_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3752_cast_fp16 = einsum(equation = var_3752_equation_0, values = (var_3514_cast_fp16, var_3465_cast_fp16))[name = tensor<string, []>("op_3752_cast_fp16")];
+            tensor<fp16, []> var_3753_to_fp16 = const()[name = tensor<string, []>("op_3753_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_383_cast_fp16 = mul(x = var_3752_cast_fp16, y = var_3753_to_fp16)[name = tensor<string, []>("aw_chunk_383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3755_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_289_cast_fp16)[name = tensor<string, []>("op_3755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3756_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_291_cast_fp16)[name = tensor<string, []>("op_3756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3757_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_293_cast_fp16)[name = tensor<string, []>("op_3757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3758_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_295_cast_fp16)[name = tensor<string, []>("op_3758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3759_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_297_cast_fp16)[name = tensor<string, []>("op_3759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3760_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_299_cast_fp16)[name = tensor<string, []>("op_3760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3761_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_301_cast_fp16)[name = tensor<string, []>("op_3761_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3762_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_303_cast_fp16)[name = tensor<string, []>("op_3762_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3763_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_305_cast_fp16)[name = tensor<string, []>("op_3763_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3764_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_307_cast_fp16)[name = tensor<string, []>("op_3764_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3765_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_309_cast_fp16)[name = tensor<string, []>("op_3765_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3766_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_311_cast_fp16)[name = tensor<string, []>("op_3766_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3767_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_313_cast_fp16)[name = tensor<string, []>("op_3767_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3768_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_315_cast_fp16)[name = tensor<string, []>("op_3768_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3769_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_317_cast_fp16)[name = tensor<string, []>("op_3769_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3770_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_319_cast_fp16)[name = tensor<string, []>("op_3770_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3771_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_321_cast_fp16)[name = tensor<string, []>("op_3771_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3772_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_323_cast_fp16)[name = tensor<string, []>("op_3772_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3773_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_325_cast_fp16)[name = tensor<string, []>("op_3773_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3774_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_327_cast_fp16)[name = tensor<string, []>("op_3774_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3775_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_329_cast_fp16)[name = tensor<string, []>("op_3775_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3776_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_331_cast_fp16)[name = tensor<string, []>("op_3776_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3777_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_333_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3778_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_335_cast_fp16)[name = tensor<string, []>("op_3778_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3779_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_337_cast_fp16)[name = tensor<string, []>("op_3779_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3780_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_339_cast_fp16)[name = tensor<string, []>("op_3780_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3781_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_341_cast_fp16)[name = tensor<string, []>("op_3781_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3782_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_343_cast_fp16)[name = tensor<string, []>("op_3782_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3783_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_345_cast_fp16)[name = tensor<string, []>("op_3783_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3784_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_347_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3785_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_349_cast_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3786_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_351_cast_fp16)[name = tensor<string, []>("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3787_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_353_cast_fp16)[name = tensor<string, []>("op_3787_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3788_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_355_cast_fp16)[name = tensor<string, []>("op_3788_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3789_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_357_cast_fp16)[name = tensor<string, []>("op_3789_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3790_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_359_cast_fp16)[name = tensor<string, []>("op_3790_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3791_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_361_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3792_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_363_cast_fp16)[name = tensor<string, []>("op_3792_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3793_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_365_cast_fp16)[name = tensor<string, []>("op_3793_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3794_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_367_cast_fp16)[name = tensor<string, []>("op_3794_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3795_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_369_cast_fp16)[name = tensor<string, []>("op_3795_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3796_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_371_cast_fp16)[name = tensor<string, []>("op_3796_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3797_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_373_cast_fp16)[name = tensor<string, []>("op_3797_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3798_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_375_cast_fp16)[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3799_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_377_cast_fp16)[name = tensor<string, []>("op_3799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3800_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_379_cast_fp16)[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3801_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_381_cast_fp16)[name = tensor<string, []>("op_3801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3802_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_383_cast_fp16)[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<string, []> var_3804_equation_0 = const()[name = tensor<string, []>("op_3804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3804_cast_fp16 = einsum(equation = var_3804_equation_0, values = (var_3516_cast_fp16, var_3755_cast_fp16))[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<string, []> var_3806_equation_0 = const()[name = tensor<string, []>("op_3806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3806_cast_fp16 = einsum(equation = var_3806_equation_0, values = (var_3516_cast_fp16, var_3756_cast_fp16))[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<string, []> var_3808_equation_0 = const()[name = tensor<string, []>("op_3808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3808_cast_fp16 = einsum(equation = var_3808_equation_0, values = (var_3516_cast_fp16, var_3757_cast_fp16))[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<string, []> var_3810_equation_0 = const()[name = tensor<string, []>("op_3810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3810_cast_fp16 = einsum(equation = var_3810_equation_0, values = (var_3516_cast_fp16, var_3758_cast_fp16))[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<string, []> var_3812_equation_0 = const()[name = tensor<string, []>("op_3812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3812_cast_fp16 = einsum(equation = var_3812_equation_0, values = (var_3520_cast_fp16, var_3759_cast_fp16))[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<string, []> var_3814_equation_0 = const()[name = tensor<string, []>("op_3814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3814_cast_fp16 = einsum(equation = var_3814_equation_0, values = (var_3520_cast_fp16, var_3760_cast_fp16))[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3520_cast_fp16, var_3761_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3520_cast_fp16, var_3762_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3524_cast_fp16, var_3763_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3524_cast_fp16, var_3764_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3524_cast_fp16, var_3765_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3524_cast_fp16, var_3766_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3528_cast_fp16, var_3767_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3528_cast_fp16, var_3768_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3528_cast_fp16, var_3769_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3528_cast_fp16, var_3770_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3532_cast_fp16, var_3771_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<string, []> var_3838_equation_0 = const()[name = tensor<string, []>("op_3838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3838_cast_fp16 = einsum(equation = var_3838_equation_0, values = (var_3532_cast_fp16, var_3772_cast_fp16))[name = tensor<string, []>("op_3838_cast_fp16")];
+            tensor<string, []> var_3840_equation_0 = const()[name = tensor<string, []>("op_3840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3840_cast_fp16 = einsum(equation = var_3840_equation_0, values = (var_3532_cast_fp16, var_3773_cast_fp16))[name = tensor<string, []>("op_3840_cast_fp16")];
+            tensor<string, []> var_3842_equation_0 = const()[name = tensor<string, []>("op_3842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3842_cast_fp16 = einsum(equation = var_3842_equation_0, values = (var_3532_cast_fp16, var_3774_cast_fp16))[name = tensor<string, []>("op_3842_cast_fp16")];
+            tensor<string, []> var_3844_equation_0 = const()[name = tensor<string, []>("op_3844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3844_cast_fp16 = einsum(equation = var_3844_equation_0, values = (var_3536_cast_fp16, var_3775_cast_fp16))[name = tensor<string, []>("op_3844_cast_fp16")];
+            tensor<string, []> var_3846_equation_0 = const()[name = tensor<string, []>("op_3846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3846_cast_fp16 = einsum(equation = var_3846_equation_0, values = (var_3536_cast_fp16, var_3776_cast_fp16))[name = tensor<string, []>("op_3846_cast_fp16")];
+            tensor<string, []> var_3848_equation_0 = const()[name = tensor<string, []>("op_3848_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3848_cast_fp16 = einsum(equation = var_3848_equation_0, values = (var_3536_cast_fp16, var_3777_cast_fp16))[name = tensor<string, []>("op_3848_cast_fp16")];
+            tensor<string, []> var_3850_equation_0 = const()[name = tensor<string, []>("op_3850_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3850_cast_fp16 = einsum(equation = var_3850_equation_0, values = (var_3536_cast_fp16, var_3778_cast_fp16))[name = tensor<string, []>("op_3850_cast_fp16")];
+            tensor<string, []> var_3852_equation_0 = const()[name = tensor<string, []>("op_3852_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3852_cast_fp16 = einsum(equation = var_3852_equation_0, values = (var_3540_cast_fp16, var_3779_cast_fp16))[name = tensor<string, []>("op_3852_cast_fp16")];
+            tensor<string, []> var_3854_equation_0 = const()[name = tensor<string, []>("op_3854_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3854_cast_fp16 = einsum(equation = var_3854_equation_0, values = (var_3540_cast_fp16, var_3780_cast_fp16))[name = tensor<string, []>("op_3854_cast_fp16")];
+            tensor<string, []> var_3856_equation_0 = const()[name = tensor<string, []>("op_3856_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3856_cast_fp16 = einsum(equation = var_3856_equation_0, values = (var_3540_cast_fp16, var_3781_cast_fp16))[name = tensor<string, []>("op_3856_cast_fp16")];
+            tensor<string, []> var_3858_equation_0 = const()[name = tensor<string, []>("op_3858_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3858_cast_fp16 = einsum(equation = var_3858_equation_0, values = (var_3540_cast_fp16, var_3782_cast_fp16))[name = tensor<string, []>("op_3858_cast_fp16")];
+            tensor<string, []> var_3860_equation_0 = const()[name = tensor<string, []>("op_3860_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3860_cast_fp16 = einsum(equation = var_3860_equation_0, values = (var_3544_cast_fp16, var_3783_cast_fp16))[name = tensor<string, []>("op_3860_cast_fp16")];
+            tensor<string, []> var_3862_equation_0 = const()[name = tensor<string, []>("op_3862_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3862_cast_fp16 = einsum(equation = var_3862_equation_0, values = (var_3544_cast_fp16, var_3784_cast_fp16))[name = tensor<string, []>("op_3862_cast_fp16")];
+            tensor<string, []> var_3864_equation_0 = const()[name = tensor<string, []>("op_3864_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3864_cast_fp16 = einsum(equation = var_3864_equation_0, values = (var_3544_cast_fp16, var_3785_cast_fp16))[name = tensor<string, []>("op_3864_cast_fp16")];
+            tensor<string, []> var_3866_equation_0 = const()[name = tensor<string, []>("op_3866_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3866_cast_fp16 = einsum(equation = var_3866_equation_0, values = (var_3544_cast_fp16, var_3786_cast_fp16))[name = tensor<string, []>("op_3866_cast_fp16")];
+            tensor<string, []> var_3868_equation_0 = const()[name = tensor<string, []>("op_3868_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3868_cast_fp16 = einsum(equation = var_3868_equation_0, values = (var_3548_cast_fp16, var_3787_cast_fp16))[name = tensor<string, []>("op_3868_cast_fp16")];
+            tensor<string, []> var_3870_equation_0 = const()[name = tensor<string, []>("op_3870_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3870_cast_fp16 = einsum(equation = var_3870_equation_0, values = (var_3548_cast_fp16, var_3788_cast_fp16))[name = tensor<string, []>("op_3870_cast_fp16")];
+            tensor<string, []> var_3872_equation_0 = const()[name = tensor<string, []>("op_3872_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3872_cast_fp16 = einsum(equation = var_3872_equation_0, values = (var_3548_cast_fp16, var_3789_cast_fp16))[name = tensor<string, []>("op_3872_cast_fp16")];
+            tensor<string, []> var_3874_equation_0 = const()[name = tensor<string, []>("op_3874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3874_cast_fp16 = einsum(equation = var_3874_equation_0, values = (var_3548_cast_fp16, var_3790_cast_fp16))[name = tensor<string, []>("op_3874_cast_fp16")];
+            tensor<string, []> var_3876_equation_0 = const()[name = tensor<string, []>("op_3876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3876_cast_fp16 = einsum(equation = var_3876_equation_0, values = (var_3552_cast_fp16, var_3791_cast_fp16))[name = tensor<string, []>("op_3876_cast_fp16")];
+            tensor<string, []> var_3878_equation_0 = const()[name = tensor<string, []>("op_3878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3878_cast_fp16 = einsum(equation = var_3878_equation_0, values = (var_3552_cast_fp16, var_3792_cast_fp16))[name = tensor<string, []>("op_3878_cast_fp16")];
+            tensor<string, []> var_3880_equation_0 = const()[name = tensor<string, []>("op_3880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3880_cast_fp16 = einsum(equation = var_3880_equation_0, values = (var_3552_cast_fp16, var_3793_cast_fp16))[name = tensor<string, []>("op_3880_cast_fp16")];
+            tensor<string, []> var_3882_equation_0 = const()[name = tensor<string, []>("op_3882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3882_cast_fp16 = einsum(equation = var_3882_equation_0, values = (var_3552_cast_fp16, var_3794_cast_fp16))[name = tensor<string, []>("op_3882_cast_fp16")];
+            tensor<string, []> var_3884_equation_0 = const()[name = tensor<string, []>("op_3884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3884_cast_fp16 = einsum(equation = var_3884_equation_0, values = (var_3556_cast_fp16, var_3795_cast_fp16))[name = tensor<string, []>("op_3884_cast_fp16")];
+            tensor<string, []> var_3886_equation_0 = const()[name = tensor<string, []>("op_3886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3886_cast_fp16 = einsum(equation = var_3886_equation_0, values = (var_3556_cast_fp16, var_3796_cast_fp16))[name = tensor<string, []>("op_3886_cast_fp16")];
+            tensor<string, []> var_3888_equation_0 = const()[name = tensor<string, []>("op_3888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3888_cast_fp16 = einsum(equation = var_3888_equation_0, values = (var_3556_cast_fp16, var_3797_cast_fp16))[name = tensor<string, []>("op_3888_cast_fp16")];
+            tensor<string, []> var_3890_equation_0 = const()[name = tensor<string, []>("op_3890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3890_cast_fp16 = einsum(equation = var_3890_equation_0, values = (var_3556_cast_fp16, var_3798_cast_fp16))[name = tensor<string, []>("op_3890_cast_fp16")];
+            tensor<string, []> var_3892_equation_0 = const()[name = tensor<string, []>("op_3892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3892_cast_fp16 = einsum(equation = var_3892_equation_0, values = (var_3560_cast_fp16, var_3799_cast_fp16))[name = tensor<string, []>("op_3892_cast_fp16")];
+            tensor<string, []> var_3894_equation_0 = const()[name = tensor<string, []>("op_3894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3894_cast_fp16 = einsum(equation = var_3894_equation_0, values = (var_3560_cast_fp16, var_3800_cast_fp16))[name = tensor<string, []>("op_3894_cast_fp16")];
+            tensor<string, []> var_3896_equation_0 = const()[name = tensor<string, []>("op_3896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3896_cast_fp16 = einsum(equation = var_3896_equation_0, values = (var_3560_cast_fp16, var_3801_cast_fp16))[name = tensor<string, []>("op_3896_cast_fp16")];
+            tensor<string, []> var_3898_equation_0 = const()[name = tensor<string, []>("op_3898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3898_cast_fp16 = einsum(equation = var_3898_equation_0, values = (var_3560_cast_fp16, var_3802_cast_fp16))[name = tensor<string, []>("op_3898_cast_fp16")];
+            tensor<bool, []> var_3900_interleave_0 = const()[name = tensor<string, []>("op_3900_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3900_cast_fp16 = concat(axis = var_3011, interleave = var_3900_interleave_0, values = (var_3804_cast_fp16, var_3806_cast_fp16, var_3808_cast_fp16, var_3810_cast_fp16))[name = tensor<string, []>("op_3900_cast_fp16")];
+            tensor<bool, []> var_3902_interleave_0 = const()[name = tensor<string, []>("op_3902_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3902_cast_fp16 = concat(axis = var_3011, interleave = var_3902_interleave_0, values = (var_3812_cast_fp16, var_3814_cast_fp16, var_3816_cast_fp16, var_3818_cast_fp16))[name = tensor<string, []>("op_3902_cast_fp16")];
+            tensor<bool, []> var_3904_interleave_0 = const()[name = tensor<string, []>("op_3904_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3904_cast_fp16 = concat(axis = var_3011, interleave = var_3904_interleave_0, values = (var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16))[name = tensor<string, []>("op_3904_cast_fp16")];
+            tensor<bool, []> var_3906_interleave_0 = const()[name = tensor<string, []>("op_3906_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3906_cast_fp16 = concat(axis = var_3011, interleave = var_3906_interleave_0, values = (var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16))[name = tensor<string, []>("op_3906_cast_fp16")];
+            tensor<bool, []> var_3908_interleave_0 = const()[name = tensor<string, []>("op_3908_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3908_cast_fp16 = concat(axis = var_3011, interleave = var_3908_interleave_0, values = (var_3836_cast_fp16, var_3838_cast_fp16, var_3840_cast_fp16, var_3842_cast_fp16))[name = tensor<string, []>("op_3908_cast_fp16")];
+            tensor<bool, []> var_3910_interleave_0 = const()[name = tensor<string, []>("op_3910_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3910_cast_fp16 = concat(axis = var_3011, interleave = var_3910_interleave_0, values = (var_3844_cast_fp16, var_3846_cast_fp16, var_3848_cast_fp16, var_3850_cast_fp16))[name = tensor<string, []>("op_3910_cast_fp16")];
+            tensor<bool, []> var_3912_interleave_0 = const()[name = tensor<string, []>("op_3912_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3912_cast_fp16 = concat(axis = var_3011, interleave = var_3912_interleave_0, values = (var_3852_cast_fp16, var_3854_cast_fp16, var_3856_cast_fp16, var_3858_cast_fp16))[name = tensor<string, []>("op_3912_cast_fp16")];
+            tensor<bool, []> var_3914_interleave_0 = const()[name = tensor<string, []>("op_3914_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3914_cast_fp16 = concat(axis = var_3011, interleave = var_3914_interleave_0, values = (var_3860_cast_fp16, var_3862_cast_fp16, var_3864_cast_fp16, var_3866_cast_fp16))[name = tensor<string, []>("op_3914_cast_fp16")];
+            tensor<bool, []> var_3916_interleave_0 = const()[name = tensor<string, []>("op_3916_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3916_cast_fp16 = concat(axis = var_3011, interleave = var_3916_interleave_0, values = (var_3868_cast_fp16, var_3870_cast_fp16, var_3872_cast_fp16, var_3874_cast_fp16))[name = tensor<string, []>("op_3916_cast_fp16")];
+            tensor<bool, []> var_3918_interleave_0 = const()[name = tensor<string, []>("op_3918_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3918_cast_fp16 = concat(axis = var_3011, interleave = var_3918_interleave_0, values = (var_3876_cast_fp16, var_3878_cast_fp16, var_3880_cast_fp16, var_3882_cast_fp16))[name = tensor<string, []>("op_3918_cast_fp16")];
+            tensor<bool, []> var_3920_interleave_0 = const()[name = tensor<string, []>("op_3920_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3920_cast_fp16 = concat(axis = var_3011, interleave = var_3920_interleave_0, values = (var_3884_cast_fp16, var_3886_cast_fp16, var_3888_cast_fp16, var_3890_cast_fp16))[name = tensor<string, []>("op_3920_cast_fp16")];
+            tensor<bool, []> var_3922_interleave_0 = const()[name = tensor<string, []>("op_3922_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3922_cast_fp16 = concat(axis = var_3011, interleave = var_3922_interleave_0, values = (var_3892_cast_fp16, var_3894_cast_fp16, var_3896_cast_fp16, var_3898_cast_fp16))[name = tensor<string, []>("op_3922_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = concat(axis = var_3028, interleave = input_25_interleave_0, values = (var_3900_cast_fp16, var_3902_cast_fp16, var_3904_cast_fp16, var_3906_cast_fp16, var_3908_cast_fp16, var_3910_cast_fp16, var_3912_cast_fp16, var_3914_cast_fp16, var_3916_cast_fp16, var_3918_cast_fp16, var_3920_cast_fp16, var_3922_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_3927 = const()[name = tensor<string, []>("op_3927"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3929 = const()[name = tensor<string, []>("op_3929"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_15_pad_type_0 = const()[name = tensor<string, []>("obj_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_15_pad_0 = const()[name = tensor<string, []>("obj_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52289280)))];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53468992)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_3929, groups = var_3028, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = var_3927, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_3935 = const()[name = tensor<string, []>("op_3935"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_3935, keep_dims = var_3029, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_3939 = const()[name = tensor<string, []>("op_3939"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3940_cast_fp16 = reduce_mean(axes = var_3939, keep_dims = var_3029, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_3940_cast_fp16")];
+            tensor<fp16, []> var_3941_to_fp16 = const()[name = tensor<string, []>("op_3941_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3942_cast_fp16 = add(x = var_3940_cast_fp16, y = var_3941_to_fp16)[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_3942_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53470592)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53472192)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<int32, [2]> var_3953 = const()[name = tensor<string, []>("op_3953"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3955 = const()[name = tensor<string, []>("op_3955"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53473792)))];
+            tensor<fp16, [3072]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58192448)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_3955, groups = var_3028, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = var_3953, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_3961 = const()[name = tensor<string, []>("op_3961"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3963 = const()[name = tensor<string, []>("op_3963"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58198656)))];
+            tensor<fp16, [768]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62917312)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_3963, groups = var_3028, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_3961, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_3970 = const()[name = tensor<string, []>("op_3970"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_3987 = const()[name = tensor<string, []>("op_3987"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_3988 = const()[name = tensor<string, []>("op_3988"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_3998 = const()[name = tensor<string, []>("op_3998"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_3998, keep_dims = var_3988, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_4002 = const()[name = tensor<string, []>("op_4002"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4003_cast_fp16 = reduce_mean(axes = var_4002, keep_dims = var_3988, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_4003_cast_fp16")];
+            tensor<fp16, []> var_4004_to_fp16 = const()[name = tensor<string, []>("op_4004_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4005_cast_fp16 = add(x = var_4003_cast_fp16, y = var_4004_to_fp16)[name = tensor<string, []>("op_4005_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_4005_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [768]> obj_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62918912)))];
+            tensor<fp16, [768]> obj_17_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62920512)))];
+            tensor<fp16, []> obj_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("obj_17_cast_fp16")];
+            tensor<int32, [2]> var_4020 = const()[name = tensor<string, []>("op_4020"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4022 = const()[name = tensor<string, []>("op_4022"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62922112)))];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64101824)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_4022, groups = var_3987, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_4020, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_4026 = const()[name = tensor<string, []>("op_4026"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4028 = const()[name = tensor<string, []>("op_4028"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_9_pad_type_0 = const()[name = tensor<string, []>("key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_9_pad_0 = const()[name = tensor<string, []>("key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64103424)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_9_cast_fp16 = conv(dilations = var_4028, groups = var_3987, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = var_4026, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<int32, [2]> var_4033 = const()[name = tensor<string, []>("op_4033"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4035 = const()[name = tensor<string, []>("op_4035"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_9_pad_type_0 = const()[name = tensor<string, []>("value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_9_pad_0 = const()[name = tensor<string, []>("value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65283136)))];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66462848)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_4035, groups = var_3987, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = var_4033, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_4042_begin_0 = const()[name = tensor<string, []>("op_4042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4042_end_0 = const()[name = tensor<string, []>("op_4042_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4042_end_mask_0 = const()[name = tensor<string, []>("op_4042_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4042_cast_fp16 = slice_by_index(begin = var_4042_begin_0, end = var_4042_end_0, end_mask = var_4042_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<int32, [4]> var_4046_begin_0 = const()[name = tensor<string, []>("op_4046_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_4046_end_0 = const()[name = tensor<string, []>("op_4046_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_4046_end_mask_0 = const()[name = tensor<string, []>("op_4046_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4046_cast_fp16 = slice_by_index(begin = var_4046_begin_0, end = var_4046_end_0, end_mask = var_4046_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<int32, [4]> var_4050_begin_0 = const()[name = tensor<string, []>("op_4050_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_4050_end_0 = const()[name = tensor<string, []>("op_4050_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_4050_end_mask_0 = const()[name = tensor<string, []>("op_4050_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4050_cast_fp16 = slice_by_index(begin = var_4050_begin_0, end = var_4050_end_0, end_mask = var_4050_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<int32, [4]> var_4054_begin_0 = const()[name = tensor<string, []>("op_4054_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_4054_end_0 = const()[name = tensor<string, []>("op_4054_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_4054_end_mask_0 = const()[name = tensor<string, []>("op_4054_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4054_cast_fp16 = slice_by_index(begin = var_4054_begin_0, end = var_4054_end_0, end_mask = var_4054_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<int32, [4]> var_4058_begin_0 = const()[name = tensor<string, []>("op_4058_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_4058_end_0 = const()[name = tensor<string, []>("op_4058_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_4058_end_mask_0 = const()[name = tensor<string, []>("op_4058_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4058_cast_fp16 = slice_by_index(begin = var_4058_begin_0, end = var_4058_end_0, end_mask = var_4058_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<int32, [4]> var_4062_begin_0 = const()[name = tensor<string, []>("op_4062_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_4062_end_0 = const()[name = tensor<string, []>("op_4062_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_4062_end_mask_0 = const()[name = tensor<string, []>("op_4062_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4062_cast_fp16 = slice_by_index(begin = var_4062_begin_0, end = var_4062_end_0, end_mask = var_4062_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<int32, [4]> var_4066_begin_0 = const()[name = tensor<string, []>("op_4066_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_4066_end_0 = const()[name = tensor<string, []>("op_4066_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_4066_end_mask_0 = const()[name = tensor<string, []>("op_4066_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = var_4066_end_0, end_mask = var_4066_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4066_cast_fp16")];
+            tensor<int32, [4]> var_4070_begin_0 = const()[name = tensor<string, []>("op_4070_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_4070_end_0 = const()[name = tensor<string, []>("op_4070_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_4070_end_mask_0 = const()[name = tensor<string, []>("op_4070_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4070_cast_fp16 = slice_by_index(begin = var_4070_begin_0, end = var_4070_end_0, end_mask = var_4070_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4070_cast_fp16")];
+            tensor<int32, [4]> var_4074_begin_0 = const()[name = tensor<string, []>("op_4074_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_4074_end_0 = const()[name = tensor<string, []>("op_4074_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_4074_end_mask_0 = const()[name = tensor<string, []>("op_4074_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4074_cast_fp16 = slice_by_index(begin = var_4074_begin_0, end = var_4074_end_0, end_mask = var_4074_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4074_cast_fp16")];
+            tensor<int32, [4]> var_4078_begin_0 = const()[name = tensor<string, []>("op_4078_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_4078_end_0 = const()[name = tensor<string, []>("op_4078_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_4078_end_mask_0 = const()[name = tensor<string, []>("op_4078_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4078_cast_fp16 = slice_by_index(begin = var_4078_begin_0, end = var_4078_end_0, end_mask = var_4078_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4078_cast_fp16")];
+            tensor<int32, [4]> var_4082_begin_0 = const()[name = tensor<string, []>("op_4082_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_4082_end_0 = const()[name = tensor<string, []>("op_4082_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_4082_end_mask_0 = const()[name = tensor<string, []>("op_4082_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4082_cast_fp16 = slice_by_index(begin = var_4082_begin_0, end = var_4082_end_0, end_mask = var_4082_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4082_cast_fp16")];
+            tensor<int32, [4]> var_4086_begin_0 = const()[name = tensor<string, []>("op_4086_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_4086_end_0 = const()[name = tensor<string, []>("op_4086_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_4086_end_mask_0 = const()[name = tensor<string, []>("op_4086_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4086_cast_fp16 = slice_by_index(begin = var_4086_begin_0, end = var_4086_end_0, end_mask = var_4086_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4086_cast_fp16")];
+            tensor<int32, [4]> var_4095_begin_0 = const()[name = tensor<string, []>("op_4095_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4095_end_0 = const()[name = tensor<string, []>("op_4095_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4095_end_mask_0 = const()[name = tensor<string, []>("op_4095_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4095_cast_fp16 = slice_by_index(begin = var_4095_begin_0, end = var_4095_end_0, end_mask = var_4095_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4095_cast_fp16")];
+            tensor<int32, [4]> var_4102_begin_0 = const()[name = tensor<string, []>("op_4102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4102_end_0 = const()[name = tensor<string, []>("op_4102_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4102_end_mask_0 = const()[name = tensor<string, []>("op_4102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4102_cast_fp16 = slice_by_index(begin = var_4102_begin_0, end = var_4102_end_0, end_mask = var_4102_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4102_cast_fp16")];
+            tensor<int32, [4]> var_4109_begin_0 = const()[name = tensor<string, []>("op_4109_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4109_end_0 = const()[name = tensor<string, []>("op_4109_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4109_end_mask_0 = const()[name = tensor<string, []>("op_4109_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4109_cast_fp16 = slice_by_index(begin = var_4109_begin_0, end = var_4109_end_0, end_mask = var_4109_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4109_cast_fp16")];
+            tensor<int32, [4]> var_4116_begin_0 = const()[name = tensor<string, []>("op_4116_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4116_end_0 = const()[name = tensor<string, []>("op_4116_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4116_end_mask_0 = const()[name = tensor<string, []>("op_4116_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4116_cast_fp16 = slice_by_index(begin = var_4116_begin_0, end = var_4116_end_0, end_mask = var_4116_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4116_cast_fp16")];
+            tensor<int32, [4]> var_4123_begin_0 = const()[name = tensor<string, []>("op_4123_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4123_end_0 = const()[name = tensor<string, []>("op_4123_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4123_end_mask_0 = const()[name = tensor<string, []>("op_4123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4123_cast_fp16 = slice_by_index(begin = var_4123_begin_0, end = var_4123_end_0, end_mask = var_4123_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4123_cast_fp16")];
+            tensor<int32, [4]> var_4130_begin_0 = const()[name = tensor<string, []>("op_4130_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4130_end_0 = const()[name = tensor<string, []>("op_4130_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4130_end_mask_0 = const()[name = tensor<string, []>("op_4130_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4130_cast_fp16 = slice_by_index(begin = var_4130_begin_0, end = var_4130_end_0, end_mask = var_4130_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4130_cast_fp16")];
+            tensor<int32, [4]> var_4137_begin_0 = const()[name = tensor<string, []>("op_4137_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4137_end_0 = const()[name = tensor<string, []>("op_4137_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4137_end_mask_0 = const()[name = tensor<string, []>("op_4137_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4137_cast_fp16 = slice_by_index(begin = var_4137_begin_0, end = var_4137_end_0, end_mask = var_4137_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4137_cast_fp16")];
+            tensor<int32, [4]> var_4144_begin_0 = const()[name = tensor<string, []>("op_4144_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4144_end_0 = const()[name = tensor<string, []>("op_4144_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4144_end_mask_0 = const()[name = tensor<string, []>("op_4144_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4144_cast_fp16 = slice_by_index(begin = var_4144_begin_0, end = var_4144_end_0, end_mask = var_4144_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4144_cast_fp16")];
+            tensor<int32, [4]> var_4151_begin_0 = const()[name = tensor<string, []>("op_4151_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4151_end_0 = const()[name = tensor<string, []>("op_4151_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4151_end_mask_0 = const()[name = tensor<string, []>("op_4151_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4151_cast_fp16 = slice_by_index(begin = var_4151_begin_0, end = var_4151_end_0, end_mask = var_4151_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4151_cast_fp16")];
+            tensor<int32, [4]> var_4158_begin_0 = const()[name = tensor<string, []>("op_4158_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4158_end_0 = const()[name = tensor<string, []>("op_4158_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4158_end_mask_0 = const()[name = tensor<string, []>("op_4158_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4158_cast_fp16 = slice_by_index(begin = var_4158_begin_0, end = var_4158_end_0, end_mask = var_4158_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4158_cast_fp16")];
+            tensor<int32, [4]> var_4165_begin_0 = const()[name = tensor<string, []>("op_4165_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4165_end_0 = const()[name = tensor<string, []>("op_4165_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4165_end_mask_0 = const()[name = tensor<string, []>("op_4165_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4165_cast_fp16 = slice_by_index(begin = var_4165_begin_0, end = var_4165_end_0, end_mask = var_4165_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4165_cast_fp16")];
+            tensor<int32, [4]> var_4172_begin_0 = const()[name = tensor<string, []>("op_4172_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4172_end_0 = const()[name = tensor<string, []>("op_4172_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4172_end_mask_0 = const()[name = tensor<string, []>("op_4172_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4172_cast_fp16 = slice_by_index(begin = var_4172_begin_0, end = var_4172_end_0, end_mask = var_4172_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4172_cast_fp16")];
+            tensor<int32, [4]> var_4179_begin_0 = const()[name = tensor<string, []>("op_4179_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4179_end_0 = const()[name = tensor<string, []>("op_4179_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4179_end_mask_0 = const()[name = tensor<string, []>("op_4179_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4179_cast_fp16 = slice_by_index(begin = var_4179_begin_0, end = var_4179_end_0, end_mask = var_4179_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4179_cast_fp16")];
+            tensor<int32, [4]> var_4186_begin_0 = const()[name = tensor<string, []>("op_4186_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4186_end_0 = const()[name = tensor<string, []>("op_4186_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4186_end_mask_0 = const()[name = tensor<string, []>("op_4186_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4186_cast_fp16 = slice_by_index(begin = var_4186_begin_0, end = var_4186_end_0, end_mask = var_4186_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4186_cast_fp16")];
+            tensor<int32, [4]> var_4193_begin_0 = const()[name = tensor<string, []>("op_4193_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4193_end_0 = const()[name = tensor<string, []>("op_4193_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4193_end_mask_0 = const()[name = tensor<string, []>("op_4193_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4193_cast_fp16 = slice_by_index(begin = var_4193_begin_0, end = var_4193_end_0, end_mask = var_4193_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4193_cast_fp16")];
+            tensor<int32, [4]> var_4200_begin_0 = const()[name = tensor<string, []>("op_4200_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4200_end_0 = const()[name = tensor<string, []>("op_4200_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4200_end_mask_0 = const()[name = tensor<string, []>("op_4200_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4200_cast_fp16 = slice_by_index(begin = var_4200_begin_0, end = var_4200_end_0, end_mask = var_4200_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4200_cast_fp16")];
+            tensor<int32, [4]> var_4207_begin_0 = const()[name = tensor<string, []>("op_4207_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4207_end_0 = const()[name = tensor<string, []>("op_4207_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4207_end_mask_0 = const()[name = tensor<string, []>("op_4207_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4207_cast_fp16 = slice_by_index(begin = var_4207_begin_0, end = var_4207_end_0, end_mask = var_4207_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4207_cast_fp16")];
+            tensor<int32, [4]> var_4214_begin_0 = const()[name = tensor<string, []>("op_4214_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4214_end_0 = const()[name = tensor<string, []>("op_4214_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4214_end_mask_0 = const()[name = tensor<string, []>("op_4214_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4214_cast_fp16 = slice_by_index(begin = var_4214_begin_0, end = var_4214_end_0, end_mask = var_4214_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4214_cast_fp16")];
+            tensor<int32, [4]> var_4221_begin_0 = const()[name = tensor<string, []>("op_4221_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4221_end_0 = const()[name = tensor<string, []>("op_4221_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4221_end_mask_0 = const()[name = tensor<string, []>("op_4221_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4221_cast_fp16 = slice_by_index(begin = var_4221_begin_0, end = var_4221_end_0, end_mask = var_4221_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4221_cast_fp16")];
+            tensor<int32, [4]> var_4228_begin_0 = const()[name = tensor<string, []>("op_4228_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4228_end_0 = const()[name = tensor<string, []>("op_4228_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4228_end_mask_0 = const()[name = tensor<string, []>("op_4228_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4228_cast_fp16 = slice_by_index(begin = var_4228_begin_0, end = var_4228_end_0, end_mask = var_4228_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4228_cast_fp16")];
+            tensor<int32, [4]> var_4235_begin_0 = const()[name = tensor<string, []>("op_4235_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4235_end_0 = const()[name = tensor<string, []>("op_4235_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4235_end_mask_0 = const()[name = tensor<string, []>("op_4235_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4235_cast_fp16 = slice_by_index(begin = var_4235_begin_0, end = var_4235_end_0, end_mask = var_4235_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4235_cast_fp16")];
+            tensor<int32, [4]> var_4242_begin_0 = const()[name = tensor<string, []>("op_4242_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4242_end_0 = const()[name = tensor<string, []>("op_4242_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4242_end_mask_0 = const()[name = tensor<string, []>("op_4242_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4242_cast_fp16 = slice_by_index(begin = var_4242_begin_0, end = var_4242_end_0, end_mask = var_4242_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4242_cast_fp16")];
+            tensor<int32, [4]> var_4249_begin_0 = const()[name = tensor<string, []>("op_4249_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4249_end_0 = const()[name = tensor<string, []>("op_4249_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4249_end_mask_0 = const()[name = tensor<string, []>("op_4249_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4249_cast_fp16 = slice_by_index(begin = var_4249_begin_0, end = var_4249_end_0, end_mask = var_4249_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4249_cast_fp16")];
+            tensor<int32, [4]> var_4256_begin_0 = const()[name = tensor<string, []>("op_4256_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4256_end_0 = const()[name = tensor<string, []>("op_4256_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4256_end_mask_0 = const()[name = tensor<string, []>("op_4256_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4256_cast_fp16 = slice_by_index(begin = var_4256_begin_0, end = var_4256_end_0, end_mask = var_4256_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4256_cast_fp16")];
+            tensor<int32, [4]> var_4263_begin_0 = const()[name = tensor<string, []>("op_4263_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4263_end_0 = const()[name = tensor<string, []>("op_4263_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4263_end_mask_0 = const()[name = tensor<string, []>("op_4263_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4263_cast_fp16 = slice_by_index(begin = var_4263_begin_0, end = var_4263_end_0, end_mask = var_4263_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4263_cast_fp16")];
+            tensor<int32, [4]> var_4270_begin_0 = const()[name = tensor<string, []>("op_4270_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4270_end_0 = const()[name = tensor<string, []>("op_4270_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4270_end_mask_0 = const()[name = tensor<string, []>("op_4270_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4270_cast_fp16 = slice_by_index(begin = var_4270_begin_0, end = var_4270_end_0, end_mask = var_4270_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4270_cast_fp16")];
+            tensor<int32, [4]> var_4277_begin_0 = const()[name = tensor<string, []>("op_4277_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4277_end_0 = const()[name = tensor<string, []>("op_4277_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4277_end_mask_0 = const()[name = tensor<string, []>("op_4277_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = var_4277_end_0, end_mask = var_4277_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4277_cast_fp16")];
+            tensor<int32, [4]> var_4284_begin_0 = const()[name = tensor<string, []>("op_4284_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4284_end_0 = const()[name = tensor<string, []>("op_4284_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4284_end_mask_0 = const()[name = tensor<string, []>("op_4284_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4284_cast_fp16 = slice_by_index(begin = var_4284_begin_0, end = var_4284_end_0, end_mask = var_4284_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4284_cast_fp16")];
+            tensor<int32, [4]> var_4291_begin_0 = const()[name = tensor<string, []>("op_4291_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4291_end_0 = const()[name = tensor<string, []>("op_4291_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4291_end_mask_0 = const()[name = tensor<string, []>("op_4291_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4291_cast_fp16 = slice_by_index(begin = var_4291_begin_0, end = var_4291_end_0, end_mask = var_4291_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4291_cast_fp16")];
+            tensor<int32, [4]> var_4298_begin_0 = const()[name = tensor<string, []>("op_4298_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4298_end_0 = const()[name = tensor<string, []>("op_4298_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4298_end_mask_0 = const()[name = tensor<string, []>("op_4298_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4298_cast_fp16 = slice_by_index(begin = var_4298_begin_0, end = var_4298_end_0, end_mask = var_4298_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4298_cast_fp16")];
+            tensor<int32, [4]> var_4305_begin_0 = const()[name = tensor<string, []>("op_4305_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4305_end_0 = const()[name = tensor<string, []>("op_4305_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4305_end_mask_0 = const()[name = tensor<string, []>("op_4305_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4305_cast_fp16 = slice_by_index(begin = var_4305_begin_0, end = var_4305_end_0, end_mask = var_4305_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4305_cast_fp16")];
+            tensor<int32, [4]> var_4312_begin_0 = const()[name = tensor<string, []>("op_4312_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4312_end_0 = const()[name = tensor<string, []>("op_4312_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4312_end_mask_0 = const()[name = tensor<string, []>("op_4312_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4312_cast_fp16 = slice_by_index(begin = var_4312_begin_0, end = var_4312_end_0, end_mask = var_4312_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4312_cast_fp16")];
+            tensor<int32, [4]> var_4319_begin_0 = const()[name = tensor<string, []>("op_4319_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4319_end_0 = const()[name = tensor<string, []>("op_4319_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4319_end_mask_0 = const()[name = tensor<string, []>("op_4319_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4319_cast_fp16 = slice_by_index(begin = var_4319_begin_0, end = var_4319_end_0, end_mask = var_4319_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4319_cast_fp16")];
+            tensor<int32, [4]> var_4326_begin_0 = const()[name = tensor<string, []>("op_4326_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4326_end_0 = const()[name = tensor<string, []>("op_4326_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4326_end_mask_0 = const()[name = tensor<string, []>("op_4326_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4326_cast_fp16 = slice_by_index(begin = var_4326_begin_0, end = var_4326_end_0, end_mask = var_4326_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4326_cast_fp16")];
+            tensor<int32, [4]> var_4333_begin_0 = const()[name = tensor<string, []>("op_4333_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4333_end_0 = const()[name = tensor<string, []>("op_4333_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4333_end_mask_0 = const()[name = tensor<string, []>("op_4333_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4333_cast_fp16 = slice_by_index(begin = var_4333_begin_0, end = var_4333_end_0, end_mask = var_4333_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4333_cast_fp16")];
+            tensor<int32, [4]> var_4340_begin_0 = const()[name = tensor<string, []>("op_4340_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4340_end_0 = const()[name = tensor<string, []>("op_4340_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4340_end_mask_0 = const()[name = tensor<string, []>("op_4340_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4340_cast_fp16 = slice_by_index(begin = var_4340_begin_0, end = var_4340_end_0, end_mask = var_4340_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4340_cast_fp16")];
+            tensor<int32, [4]> var_4347_begin_0 = const()[name = tensor<string, []>("op_4347_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4347_end_0 = const()[name = tensor<string, []>("op_4347_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4347_end_mask_0 = const()[name = tensor<string, []>("op_4347_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4347_cast_fp16 = slice_by_index(begin = var_4347_begin_0, end = var_4347_end_0, end_mask = var_4347_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4347_cast_fp16")];
+            tensor<int32, [4]> var_4354_begin_0 = const()[name = tensor<string, []>("op_4354_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4354_end_0 = const()[name = tensor<string, []>("op_4354_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4354_end_mask_0 = const()[name = tensor<string, []>("op_4354_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4354_cast_fp16 = slice_by_index(begin = var_4354_begin_0, end = var_4354_end_0, end_mask = var_4354_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4354_cast_fp16")];
+            tensor<int32, [4]> var_4361_begin_0 = const()[name = tensor<string, []>("op_4361_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4361_end_0 = const()[name = tensor<string, []>("op_4361_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4361_end_mask_0 = const()[name = tensor<string, []>("op_4361_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4361_cast_fp16 = slice_by_index(begin = var_4361_begin_0, end = var_4361_end_0, end_mask = var_4361_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4361_cast_fp16")];
+            tensor<int32, [4]> var_4368_begin_0 = const()[name = tensor<string, []>("op_4368_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4368_end_0 = const()[name = tensor<string, []>("op_4368_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4368_end_mask_0 = const()[name = tensor<string, []>("op_4368_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4368_cast_fp16 = slice_by_index(begin = var_4368_begin_0, end = var_4368_end_0, end_mask = var_4368_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4368_cast_fp16")];
+            tensor<int32, [4]> var_4375_begin_0 = const()[name = tensor<string, []>("op_4375_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4375_end_0 = const()[name = tensor<string, []>("op_4375_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4375_end_mask_0 = const()[name = tensor<string, []>("op_4375_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4375_cast_fp16 = slice_by_index(begin = var_4375_begin_0, end = var_4375_end_0, end_mask = var_4375_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4375_cast_fp16")];
+            tensor<int32, [4]> var_4382_begin_0 = const()[name = tensor<string, []>("op_4382_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4382_end_0 = const()[name = tensor<string, []>("op_4382_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4382_end_mask_0 = const()[name = tensor<string, []>("op_4382_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4382_cast_fp16 = slice_by_index(begin = var_4382_begin_0, end = var_4382_end_0, end_mask = var_4382_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4382_cast_fp16")];
+            tensor<int32, [4]> var_4389_begin_0 = const()[name = tensor<string, []>("op_4389_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4389_end_0 = const()[name = tensor<string, []>("op_4389_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4389_end_mask_0 = const()[name = tensor<string, []>("op_4389_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4389_cast_fp16 = slice_by_index(begin = var_4389_begin_0, end = var_4389_end_0, end_mask = var_4389_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4389_cast_fp16")];
+            tensor<int32, [4]> var_4396_begin_0 = const()[name = tensor<string, []>("op_4396_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4396_end_0 = const()[name = tensor<string, []>("op_4396_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4396_end_mask_0 = const()[name = tensor<string, []>("op_4396_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4396_cast_fp16 = slice_by_index(begin = var_4396_begin_0, end = var_4396_end_0, end_mask = var_4396_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4396_cast_fp16")];
+            tensor<int32, [4]> var_4403_begin_0 = const()[name = tensor<string, []>("op_4403_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4403_end_0 = const()[name = tensor<string, []>("op_4403_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4403_end_mask_0 = const()[name = tensor<string, []>("op_4403_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4403_cast_fp16 = slice_by_index(begin = var_4403_begin_0, end = var_4403_end_0, end_mask = var_4403_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4403_cast_fp16")];
+            tensor<int32, [4]> var_4410_begin_0 = const()[name = tensor<string, []>("op_4410_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4410_end_0 = const()[name = tensor<string, []>("op_4410_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4410_end_mask_0 = const()[name = tensor<string, []>("op_4410_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4410_cast_fp16 = slice_by_index(begin = var_4410_begin_0, end = var_4410_end_0, end_mask = var_4410_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4410_cast_fp16")];
+            tensor<int32, [4]> var_4417_begin_0 = const()[name = tensor<string, []>("op_4417_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4417_end_0 = const()[name = tensor<string, []>("op_4417_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4417_end_mask_0 = const()[name = tensor<string, []>("op_4417_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4417_cast_fp16 = slice_by_index(begin = var_4417_begin_0, end = var_4417_end_0, end_mask = var_4417_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4417_cast_fp16")];
+            tensor<int32, [4]> var_4424_begin_0 = const()[name = tensor<string, []>("op_4424_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4424_end_0 = const()[name = tensor<string, []>("op_4424_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4424_end_mask_0 = const()[name = tensor<string, []>("op_4424_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4424_cast_fp16 = slice_by_index(begin = var_4424_begin_0, end = var_4424_end_0, end_mask = var_4424_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4424_cast_fp16")];
+            tensor<int32, [4]> k_9_perm_0 = const()[name = tensor<string, []>("k_9_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_4429_begin_0 = const()[name = tensor<string, []>("op_4429_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4429_end_0 = const()[name = tensor<string, []>("op_4429_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_4429_end_mask_0 = const()[name = tensor<string, []>("op_4429_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_7 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4429_cast_fp16 = slice_by_index(begin = var_4429_begin_0, end = var_4429_end_0, end_mask = var_4429_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4429_cast_fp16")];
+            tensor<int32, [4]> var_4433_begin_0 = const()[name = tensor<string, []>("op_4433_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_4433_end_0 = const()[name = tensor<string, []>("op_4433_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_4433_end_mask_0 = const()[name = tensor<string, []>("op_4433_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4433_cast_fp16 = slice_by_index(begin = var_4433_begin_0, end = var_4433_end_0, end_mask = var_4433_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4433_cast_fp16")];
+            tensor<int32, [4]> var_4437_begin_0 = const()[name = tensor<string, []>("op_4437_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_4437_end_0 = const()[name = tensor<string, []>("op_4437_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_4437_end_mask_0 = const()[name = tensor<string, []>("op_4437_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4437_cast_fp16 = slice_by_index(begin = var_4437_begin_0, end = var_4437_end_0, end_mask = var_4437_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4437_cast_fp16")];
+            tensor<int32, [4]> var_4441_begin_0 = const()[name = tensor<string, []>("op_4441_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_4441_end_0 = const()[name = tensor<string, []>("op_4441_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_4441_end_mask_0 = const()[name = tensor<string, []>("op_4441_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4441_cast_fp16 = slice_by_index(begin = var_4441_begin_0, end = var_4441_end_0, end_mask = var_4441_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4441_cast_fp16")];
+            tensor<int32, [4]> var_4445_begin_0 = const()[name = tensor<string, []>("op_4445_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_4445_end_0 = const()[name = tensor<string, []>("op_4445_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_4445_end_mask_0 = const()[name = tensor<string, []>("op_4445_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4445_cast_fp16 = slice_by_index(begin = var_4445_begin_0, end = var_4445_end_0, end_mask = var_4445_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4445_cast_fp16")];
+            tensor<int32, [4]> var_4449_begin_0 = const()[name = tensor<string, []>("op_4449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_4449_end_0 = const()[name = tensor<string, []>("op_4449_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_4449_end_mask_0 = const()[name = tensor<string, []>("op_4449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4449_cast_fp16 = slice_by_index(begin = var_4449_begin_0, end = var_4449_end_0, end_mask = var_4449_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4449_cast_fp16")];
+            tensor<int32, [4]> var_4453_begin_0 = const()[name = tensor<string, []>("op_4453_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_4453_end_0 = const()[name = tensor<string, []>("op_4453_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_4453_end_mask_0 = const()[name = tensor<string, []>("op_4453_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4453_cast_fp16 = slice_by_index(begin = var_4453_begin_0, end = var_4453_end_0, end_mask = var_4453_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4453_cast_fp16")];
+            tensor<int32, [4]> var_4457_begin_0 = const()[name = tensor<string, []>("op_4457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_4457_end_0 = const()[name = tensor<string, []>("op_4457_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_4457_end_mask_0 = const()[name = tensor<string, []>("op_4457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4457_cast_fp16 = slice_by_index(begin = var_4457_begin_0, end = var_4457_end_0, end_mask = var_4457_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4457_cast_fp16")];
+            tensor<int32, [4]> var_4461_begin_0 = const()[name = tensor<string, []>("op_4461_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_4461_end_0 = const()[name = tensor<string, []>("op_4461_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_4461_end_mask_0 = const()[name = tensor<string, []>("op_4461_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4461_cast_fp16 = slice_by_index(begin = var_4461_begin_0, end = var_4461_end_0, end_mask = var_4461_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4461_cast_fp16")];
+            tensor<int32, [4]> var_4465_begin_0 = const()[name = tensor<string, []>("op_4465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_4465_end_0 = const()[name = tensor<string, []>("op_4465_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_4465_end_mask_0 = const()[name = tensor<string, []>("op_4465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4465_cast_fp16 = slice_by_index(begin = var_4465_begin_0, end = var_4465_end_0, end_mask = var_4465_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4465_cast_fp16")];
+            tensor<int32, [4]> var_4469_begin_0 = const()[name = tensor<string, []>("op_4469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_4469_end_0 = const()[name = tensor<string, []>("op_4469_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_4469_end_mask_0 = const()[name = tensor<string, []>("op_4469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4469_cast_fp16 = slice_by_index(begin = var_4469_begin_0, end = var_4469_end_0, end_mask = var_4469_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4469_cast_fp16")];
+            tensor<int32, [4]> var_4473_begin_0 = const()[name = tensor<string, []>("op_4473_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_4473_end_0 = const()[name = tensor<string, []>("op_4473_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_4473_end_mask_0 = const()[name = tensor<string, []>("op_4473_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4473_cast_fp16 = slice_by_index(begin = var_4473_begin_0, end = var_4473_end_0, end_mask = var_4473_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4473_cast_fp16")];
+            tensor<int32, [4]> var_4475_begin_0 = const()[name = tensor<string, []>("op_4475_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4475_end_0 = const()[name = tensor<string, []>("op_4475_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4475_end_mask_0 = const()[name = tensor<string, []>("op_4475_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4475_cast_fp16 = slice_by_index(begin = var_4475_begin_0, end = var_4475_end_0, end_mask = var_4475_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4475_cast_fp16")];
+            tensor<int32, [4]> var_4479_begin_0 = const()[name = tensor<string, []>("op_4479_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_4479_end_0 = const()[name = tensor<string, []>("op_4479_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_4479_end_mask_0 = const()[name = tensor<string, []>("op_4479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4479_cast_fp16 = slice_by_index(begin = var_4479_begin_0, end = var_4479_end_0, end_mask = var_4479_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4479_cast_fp16")];
+            tensor<int32, [4]> var_4483_begin_0 = const()[name = tensor<string, []>("op_4483_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_4483_end_0 = const()[name = tensor<string, []>("op_4483_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_4483_end_mask_0 = const()[name = tensor<string, []>("op_4483_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4483_cast_fp16 = slice_by_index(begin = var_4483_begin_0, end = var_4483_end_0, end_mask = var_4483_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4483_cast_fp16")];
+            tensor<int32, [4]> var_4487_begin_0 = const()[name = tensor<string, []>("op_4487_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_4487_end_0 = const()[name = tensor<string, []>("op_4487_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_4487_end_mask_0 = const()[name = tensor<string, []>("op_4487_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = var_4487_end_0, end_mask = var_4487_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4487_cast_fp16")];
+            tensor<int32, [4]> var_4491_begin_0 = const()[name = tensor<string, []>("op_4491_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_4491_end_0 = const()[name = tensor<string, []>("op_4491_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_4491_end_mask_0 = const()[name = tensor<string, []>("op_4491_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4491_cast_fp16 = slice_by_index(begin = var_4491_begin_0, end = var_4491_end_0, end_mask = var_4491_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4491_cast_fp16")];
+            tensor<int32, [4]> var_4495_begin_0 = const()[name = tensor<string, []>("op_4495_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_4495_end_0 = const()[name = tensor<string, []>("op_4495_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_4495_end_mask_0 = const()[name = tensor<string, []>("op_4495_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4495_cast_fp16 = slice_by_index(begin = var_4495_begin_0, end = var_4495_end_0, end_mask = var_4495_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4495_cast_fp16")];
+            tensor<int32, [4]> var_4499_begin_0 = const()[name = tensor<string, []>("op_4499_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_4499_end_0 = const()[name = tensor<string, []>("op_4499_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_4499_end_mask_0 = const()[name = tensor<string, []>("op_4499_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4499_cast_fp16 = slice_by_index(begin = var_4499_begin_0, end = var_4499_end_0, end_mask = var_4499_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4499_cast_fp16")];
+            tensor<int32, [4]> var_4503_begin_0 = const()[name = tensor<string, []>("op_4503_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_4503_end_0 = const()[name = tensor<string, []>("op_4503_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_4503_end_mask_0 = const()[name = tensor<string, []>("op_4503_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4503_cast_fp16 = slice_by_index(begin = var_4503_begin_0, end = var_4503_end_0, end_mask = var_4503_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4503_cast_fp16")];
+            tensor<int32, [4]> var_4507_begin_0 = const()[name = tensor<string, []>("op_4507_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_4507_end_0 = const()[name = tensor<string, []>("op_4507_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_4507_end_mask_0 = const()[name = tensor<string, []>("op_4507_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4507_cast_fp16 = slice_by_index(begin = var_4507_begin_0, end = var_4507_end_0, end_mask = var_4507_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4507_cast_fp16")];
+            tensor<int32, [4]> var_4511_begin_0 = const()[name = tensor<string, []>("op_4511_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_4511_end_0 = const()[name = tensor<string, []>("op_4511_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_4511_end_mask_0 = const()[name = tensor<string, []>("op_4511_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4511_cast_fp16 = slice_by_index(begin = var_4511_begin_0, end = var_4511_end_0, end_mask = var_4511_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4511_cast_fp16")];
+            tensor<int32, [4]> var_4515_begin_0 = const()[name = tensor<string, []>("op_4515_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_4515_end_0 = const()[name = tensor<string, []>("op_4515_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_4515_end_mask_0 = const()[name = tensor<string, []>("op_4515_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4515_cast_fp16 = slice_by_index(begin = var_4515_begin_0, end = var_4515_end_0, end_mask = var_4515_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4515_cast_fp16")];
+            tensor<int32, [4]> var_4519_begin_0 = const()[name = tensor<string, []>("op_4519_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_4519_end_0 = const()[name = tensor<string, []>("op_4519_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_4519_end_mask_0 = const()[name = tensor<string, []>("op_4519_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4519_cast_fp16 = slice_by_index(begin = var_4519_begin_0, end = var_4519_end_0, end_mask = var_4519_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4519_cast_fp16")];
+            tensor<string, []> var_4523_equation_0 = const()[name = tensor<string, []>("op_4523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4523_cast_fp16 = einsum(equation = var_4523_equation_0, values = (var_4429_cast_fp16, var_4095_cast_fp16))[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<fp16, []> var_4524_to_fp16 = const()[name = tensor<string, []>("op_4524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_385_cast_fp16 = mul(x = var_4523_cast_fp16, y = var_4524_to_fp16)[name = tensor<string, []>("aw_chunk_385_cast_fp16")];
+            tensor<string, []> var_4527_equation_0 = const()[name = tensor<string, []>("op_4527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4527_cast_fp16 = einsum(equation = var_4527_equation_0, values = (var_4429_cast_fp16, var_4102_cast_fp16))[name = tensor<string, []>("op_4527_cast_fp16")];
+            tensor<fp16, []> var_4528_to_fp16 = const()[name = tensor<string, []>("op_4528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_387_cast_fp16 = mul(x = var_4527_cast_fp16, y = var_4528_to_fp16)[name = tensor<string, []>("aw_chunk_387_cast_fp16")];
+            tensor<string, []> var_4531_equation_0 = const()[name = tensor<string, []>("op_4531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4531_cast_fp16 = einsum(equation = var_4531_equation_0, values = (var_4429_cast_fp16, var_4109_cast_fp16))[name = tensor<string, []>("op_4531_cast_fp16")];
+            tensor<fp16, []> var_4532_to_fp16 = const()[name = tensor<string, []>("op_4532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_389_cast_fp16 = mul(x = var_4531_cast_fp16, y = var_4532_to_fp16)[name = tensor<string, []>("aw_chunk_389_cast_fp16")];
+            tensor<string, []> var_4535_equation_0 = const()[name = tensor<string, []>("op_4535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4535_cast_fp16 = einsum(equation = var_4535_equation_0, values = (var_4429_cast_fp16, var_4116_cast_fp16))[name = tensor<string, []>("op_4535_cast_fp16")];
+            tensor<fp16, []> var_4536_to_fp16 = const()[name = tensor<string, []>("op_4536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_391_cast_fp16 = mul(x = var_4535_cast_fp16, y = var_4536_to_fp16)[name = tensor<string, []>("aw_chunk_391_cast_fp16")];
+            tensor<string, []> var_4539_equation_0 = const()[name = tensor<string, []>("op_4539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4539_cast_fp16 = einsum(equation = var_4539_equation_0, values = (var_4433_cast_fp16, var_4123_cast_fp16))[name = tensor<string, []>("op_4539_cast_fp16")];
+            tensor<fp16, []> var_4540_to_fp16 = const()[name = tensor<string, []>("op_4540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_393_cast_fp16 = mul(x = var_4539_cast_fp16, y = var_4540_to_fp16)[name = tensor<string, []>("aw_chunk_393_cast_fp16")];
+            tensor<string, []> var_4543_equation_0 = const()[name = tensor<string, []>("op_4543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4543_cast_fp16 = einsum(equation = var_4543_equation_0, values = (var_4433_cast_fp16, var_4130_cast_fp16))[name = tensor<string, []>("op_4543_cast_fp16")];
+            tensor<fp16, []> var_4544_to_fp16 = const()[name = tensor<string, []>("op_4544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_395_cast_fp16 = mul(x = var_4543_cast_fp16, y = var_4544_to_fp16)[name = tensor<string, []>("aw_chunk_395_cast_fp16")];
+            tensor<string, []> var_4547_equation_0 = const()[name = tensor<string, []>("op_4547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4547_cast_fp16 = einsum(equation = var_4547_equation_0, values = (var_4433_cast_fp16, var_4137_cast_fp16))[name = tensor<string, []>("op_4547_cast_fp16")];
+            tensor<fp16, []> var_4548_to_fp16 = const()[name = tensor<string, []>("op_4548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_397_cast_fp16 = mul(x = var_4547_cast_fp16, y = var_4548_to_fp16)[name = tensor<string, []>("aw_chunk_397_cast_fp16")];
+            tensor<string, []> var_4551_equation_0 = const()[name = tensor<string, []>("op_4551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4551_cast_fp16 = einsum(equation = var_4551_equation_0, values = (var_4433_cast_fp16, var_4144_cast_fp16))[name = tensor<string, []>("op_4551_cast_fp16")];
+            tensor<fp16, []> var_4552_to_fp16 = const()[name = tensor<string, []>("op_4552_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_399_cast_fp16 = mul(x = var_4551_cast_fp16, y = var_4552_to_fp16)[name = tensor<string, []>("aw_chunk_399_cast_fp16")];
+            tensor<string, []> var_4555_equation_0 = const()[name = tensor<string, []>("op_4555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4555_cast_fp16 = einsum(equation = var_4555_equation_0, values = (var_4437_cast_fp16, var_4151_cast_fp16))[name = tensor<string, []>("op_4555_cast_fp16")];
+            tensor<fp16, []> var_4556_to_fp16 = const()[name = tensor<string, []>("op_4556_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_401_cast_fp16 = mul(x = var_4555_cast_fp16, y = var_4556_to_fp16)[name = tensor<string, []>("aw_chunk_401_cast_fp16")];
+            tensor<string, []> var_4559_equation_0 = const()[name = tensor<string, []>("op_4559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4559_cast_fp16 = einsum(equation = var_4559_equation_0, values = (var_4437_cast_fp16, var_4158_cast_fp16))[name = tensor<string, []>("op_4559_cast_fp16")];
+            tensor<fp16, []> var_4560_to_fp16 = const()[name = tensor<string, []>("op_4560_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_403_cast_fp16 = mul(x = var_4559_cast_fp16, y = var_4560_to_fp16)[name = tensor<string, []>("aw_chunk_403_cast_fp16")];
+            tensor<string, []> var_4563_equation_0 = const()[name = tensor<string, []>("op_4563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4563_cast_fp16 = einsum(equation = var_4563_equation_0, values = (var_4437_cast_fp16, var_4165_cast_fp16))[name = tensor<string, []>("op_4563_cast_fp16")];
+            tensor<fp16, []> var_4564_to_fp16 = const()[name = tensor<string, []>("op_4564_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_405_cast_fp16 = mul(x = var_4563_cast_fp16, y = var_4564_to_fp16)[name = tensor<string, []>("aw_chunk_405_cast_fp16")];
+            tensor<string, []> var_4567_equation_0 = const()[name = tensor<string, []>("op_4567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4567_cast_fp16 = einsum(equation = var_4567_equation_0, values = (var_4437_cast_fp16, var_4172_cast_fp16))[name = tensor<string, []>("op_4567_cast_fp16")];
+            tensor<fp16, []> var_4568_to_fp16 = const()[name = tensor<string, []>("op_4568_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_407_cast_fp16 = mul(x = var_4567_cast_fp16, y = var_4568_to_fp16)[name = tensor<string, []>("aw_chunk_407_cast_fp16")];
+            tensor<string, []> var_4571_equation_0 = const()[name = tensor<string, []>("op_4571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4571_cast_fp16 = einsum(equation = var_4571_equation_0, values = (var_4441_cast_fp16, var_4179_cast_fp16))[name = tensor<string, []>("op_4571_cast_fp16")];
+            tensor<fp16, []> var_4572_to_fp16 = const()[name = tensor<string, []>("op_4572_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_409_cast_fp16 = mul(x = var_4571_cast_fp16, y = var_4572_to_fp16)[name = tensor<string, []>("aw_chunk_409_cast_fp16")];
+            tensor<string, []> var_4575_equation_0 = const()[name = tensor<string, []>("op_4575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4575_cast_fp16 = einsum(equation = var_4575_equation_0, values = (var_4441_cast_fp16, var_4186_cast_fp16))[name = tensor<string, []>("op_4575_cast_fp16")];
+            tensor<fp16, []> var_4576_to_fp16 = const()[name = tensor<string, []>("op_4576_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_411_cast_fp16 = mul(x = var_4575_cast_fp16, y = var_4576_to_fp16)[name = tensor<string, []>("aw_chunk_411_cast_fp16")];
+            tensor<string, []> var_4579_equation_0 = const()[name = tensor<string, []>("op_4579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4579_cast_fp16 = einsum(equation = var_4579_equation_0, values = (var_4441_cast_fp16, var_4193_cast_fp16))[name = tensor<string, []>("op_4579_cast_fp16")];
+            tensor<fp16, []> var_4580_to_fp16 = const()[name = tensor<string, []>("op_4580_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_413_cast_fp16 = mul(x = var_4579_cast_fp16, y = var_4580_to_fp16)[name = tensor<string, []>("aw_chunk_413_cast_fp16")];
+            tensor<string, []> var_4583_equation_0 = const()[name = tensor<string, []>("op_4583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4583_cast_fp16 = einsum(equation = var_4583_equation_0, values = (var_4441_cast_fp16, var_4200_cast_fp16))[name = tensor<string, []>("op_4583_cast_fp16")];
+            tensor<fp16, []> var_4584_to_fp16 = const()[name = tensor<string, []>("op_4584_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_415_cast_fp16 = mul(x = var_4583_cast_fp16, y = var_4584_to_fp16)[name = tensor<string, []>("aw_chunk_415_cast_fp16")];
+            tensor<string, []> var_4587_equation_0 = const()[name = tensor<string, []>("op_4587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4587_cast_fp16 = einsum(equation = var_4587_equation_0, values = (var_4445_cast_fp16, var_4207_cast_fp16))[name = tensor<string, []>("op_4587_cast_fp16")];
+            tensor<fp16, []> var_4588_to_fp16 = const()[name = tensor<string, []>("op_4588_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_417_cast_fp16 = mul(x = var_4587_cast_fp16, y = var_4588_to_fp16)[name = tensor<string, []>("aw_chunk_417_cast_fp16")];
+            tensor<string, []> var_4591_equation_0 = const()[name = tensor<string, []>("op_4591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4591_cast_fp16 = einsum(equation = var_4591_equation_0, values = (var_4445_cast_fp16, var_4214_cast_fp16))[name = tensor<string, []>("op_4591_cast_fp16")];
+            tensor<fp16, []> var_4592_to_fp16 = const()[name = tensor<string, []>("op_4592_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_419_cast_fp16 = mul(x = var_4591_cast_fp16, y = var_4592_to_fp16)[name = tensor<string, []>("aw_chunk_419_cast_fp16")];
+            tensor<string, []> var_4595_equation_0 = const()[name = tensor<string, []>("op_4595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4595_cast_fp16 = einsum(equation = var_4595_equation_0, values = (var_4445_cast_fp16, var_4221_cast_fp16))[name = tensor<string, []>("op_4595_cast_fp16")];
+            tensor<fp16, []> var_4596_to_fp16 = const()[name = tensor<string, []>("op_4596_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_421_cast_fp16 = mul(x = var_4595_cast_fp16, y = var_4596_to_fp16)[name = tensor<string, []>("aw_chunk_421_cast_fp16")];
+            tensor<string, []> var_4599_equation_0 = const()[name = tensor<string, []>("op_4599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4599_cast_fp16 = einsum(equation = var_4599_equation_0, values = (var_4445_cast_fp16, var_4228_cast_fp16))[name = tensor<string, []>("op_4599_cast_fp16")];
+            tensor<fp16, []> var_4600_to_fp16 = const()[name = tensor<string, []>("op_4600_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_423_cast_fp16 = mul(x = var_4599_cast_fp16, y = var_4600_to_fp16)[name = tensor<string, []>("aw_chunk_423_cast_fp16")];
+            tensor<string, []> var_4603_equation_0 = const()[name = tensor<string, []>("op_4603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4603_cast_fp16 = einsum(equation = var_4603_equation_0, values = (var_4449_cast_fp16, var_4235_cast_fp16))[name = tensor<string, []>("op_4603_cast_fp16")];
+            tensor<fp16, []> var_4604_to_fp16 = const()[name = tensor<string, []>("op_4604_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_425_cast_fp16 = mul(x = var_4603_cast_fp16, y = var_4604_to_fp16)[name = tensor<string, []>("aw_chunk_425_cast_fp16")];
+            tensor<string, []> var_4607_equation_0 = const()[name = tensor<string, []>("op_4607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4449_cast_fp16, var_4242_cast_fp16))[name = tensor<string, []>("op_4607_cast_fp16")];
+            tensor<fp16, []> var_4608_to_fp16 = const()[name = tensor<string, []>("op_4608_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_427_cast_fp16 = mul(x = var_4607_cast_fp16, y = var_4608_to_fp16)[name = tensor<string, []>("aw_chunk_427_cast_fp16")];
+            tensor<string, []> var_4611_equation_0 = const()[name = tensor<string, []>("op_4611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4449_cast_fp16, var_4249_cast_fp16))[name = tensor<string, []>("op_4611_cast_fp16")];
+            tensor<fp16, []> var_4612_to_fp16 = const()[name = tensor<string, []>("op_4612_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_429_cast_fp16 = mul(x = var_4611_cast_fp16, y = var_4612_to_fp16)[name = tensor<string, []>("aw_chunk_429_cast_fp16")];
+            tensor<string, []> var_4615_equation_0 = const()[name = tensor<string, []>("op_4615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4449_cast_fp16, var_4256_cast_fp16))[name = tensor<string, []>("op_4615_cast_fp16")];
+            tensor<fp16, []> var_4616_to_fp16 = const()[name = tensor<string, []>("op_4616_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_431_cast_fp16 = mul(x = var_4615_cast_fp16, y = var_4616_to_fp16)[name = tensor<string, []>("aw_chunk_431_cast_fp16")];
+            tensor<string, []> var_4619_equation_0 = const()[name = tensor<string, []>("op_4619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4453_cast_fp16, var_4263_cast_fp16))[name = tensor<string, []>("op_4619_cast_fp16")];
+            tensor<fp16, []> var_4620_to_fp16 = const()[name = tensor<string, []>("op_4620_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_433_cast_fp16 = mul(x = var_4619_cast_fp16, y = var_4620_to_fp16)[name = tensor<string, []>("aw_chunk_433_cast_fp16")];
+            tensor<string, []> var_4623_equation_0 = const()[name = tensor<string, []>("op_4623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4453_cast_fp16, var_4270_cast_fp16))[name = tensor<string, []>("op_4623_cast_fp16")];
+            tensor<fp16, []> var_4624_to_fp16 = const()[name = tensor<string, []>("op_4624_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_435_cast_fp16 = mul(x = var_4623_cast_fp16, y = var_4624_to_fp16)[name = tensor<string, []>("aw_chunk_435_cast_fp16")];
+            tensor<string, []> var_4627_equation_0 = const()[name = tensor<string, []>("op_4627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4453_cast_fp16, var_4277_cast_fp16))[name = tensor<string, []>("op_4627_cast_fp16")];
+            tensor<fp16, []> var_4628_to_fp16 = const()[name = tensor<string, []>("op_4628_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_437_cast_fp16 = mul(x = var_4627_cast_fp16, y = var_4628_to_fp16)[name = tensor<string, []>("aw_chunk_437_cast_fp16")];
+            tensor<string, []> var_4631_equation_0 = const()[name = tensor<string, []>("op_4631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4453_cast_fp16, var_4284_cast_fp16))[name = tensor<string, []>("op_4631_cast_fp16")];
+            tensor<fp16, []> var_4632_to_fp16 = const()[name = tensor<string, []>("op_4632_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_439_cast_fp16 = mul(x = var_4631_cast_fp16, y = var_4632_to_fp16)[name = tensor<string, []>("aw_chunk_439_cast_fp16")];
+            tensor<string, []> var_4635_equation_0 = const()[name = tensor<string, []>("op_4635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4457_cast_fp16, var_4291_cast_fp16))[name = tensor<string, []>("op_4635_cast_fp16")];
+            tensor<fp16, []> var_4636_to_fp16 = const()[name = tensor<string, []>("op_4636_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_441_cast_fp16 = mul(x = var_4635_cast_fp16, y = var_4636_to_fp16)[name = tensor<string, []>("aw_chunk_441_cast_fp16")];
+            tensor<string, []> var_4639_equation_0 = const()[name = tensor<string, []>("op_4639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4457_cast_fp16, var_4298_cast_fp16))[name = tensor<string, []>("op_4639_cast_fp16")];
+            tensor<fp16, []> var_4640_to_fp16 = const()[name = tensor<string, []>("op_4640_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_443_cast_fp16 = mul(x = var_4639_cast_fp16, y = var_4640_to_fp16)[name = tensor<string, []>("aw_chunk_443_cast_fp16")];
+            tensor<string, []> var_4643_equation_0 = const()[name = tensor<string, []>("op_4643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4457_cast_fp16, var_4305_cast_fp16))[name = tensor<string, []>("op_4643_cast_fp16")];
+            tensor<fp16, []> var_4644_to_fp16 = const()[name = tensor<string, []>("op_4644_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_445_cast_fp16 = mul(x = var_4643_cast_fp16, y = var_4644_to_fp16)[name = tensor<string, []>("aw_chunk_445_cast_fp16")];
+            tensor<string, []> var_4647_equation_0 = const()[name = tensor<string, []>("op_4647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4647_cast_fp16 = einsum(equation = var_4647_equation_0, values = (var_4457_cast_fp16, var_4312_cast_fp16))[name = tensor<string, []>("op_4647_cast_fp16")];
+            tensor<fp16, []> var_4648_to_fp16 = const()[name = tensor<string, []>("op_4648_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_447_cast_fp16 = mul(x = var_4647_cast_fp16, y = var_4648_to_fp16)[name = tensor<string, []>("aw_chunk_447_cast_fp16")];
+            tensor<string, []> var_4651_equation_0 = const()[name = tensor<string, []>("op_4651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4651_cast_fp16 = einsum(equation = var_4651_equation_0, values = (var_4461_cast_fp16, var_4319_cast_fp16))[name = tensor<string, []>("op_4651_cast_fp16")];
+            tensor<fp16, []> var_4652_to_fp16 = const()[name = tensor<string, []>("op_4652_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_449_cast_fp16 = mul(x = var_4651_cast_fp16, y = var_4652_to_fp16)[name = tensor<string, []>("aw_chunk_449_cast_fp16")];
+            tensor<string, []> var_4655_equation_0 = const()[name = tensor<string, []>("op_4655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4655_cast_fp16 = einsum(equation = var_4655_equation_0, values = (var_4461_cast_fp16, var_4326_cast_fp16))[name = tensor<string, []>("op_4655_cast_fp16")];
+            tensor<fp16, []> var_4656_to_fp16 = const()[name = tensor<string, []>("op_4656_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_451_cast_fp16 = mul(x = var_4655_cast_fp16, y = var_4656_to_fp16)[name = tensor<string, []>("aw_chunk_451_cast_fp16")];
+            tensor<string, []> var_4659_equation_0 = const()[name = tensor<string, []>("op_4659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4659_cast_fp16 = einsum(equation = var_4659_equation_0, values = (var_4461_cast_fp16, var_4333_cast_fp16))[name = tensor<string, []>("op_4659_cast_fp16")];
+            tensor<fp16, []> var_4660_to_fp16 = const()[name = tensor<string, []>("op_4660_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_453_cast_fp16 = mul(x = var_4659_cast_fp16, y = var_4660_to_fp16)[name = tensor<string, []>("aw_chunk_453_cast_fp16")];
+            tensor<string, []> var_4663_equation_0 = const()[name = tensor<string, []>("op_4663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4663_cast_fp16 = einsum(equation = var_4663_equation_0, values = (var_4461_cast_fp16, var_4340_cast_fp16))[name = tensor<string, []>("op_4663_cast_fp16")];
+            tensor<fp16, []> var_4664_to_fp16 = const()[name = tensor<string, []>("op_4664_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_455_cast_fp16 = mul(x = var_4663_cast_fp16, y = var_4664_to_fp16)[name = tensor<string, []>("aw_chunk_455_cast_fp16")];
+            tensor<string, []> var_4667_equation_0 = const()[name = tensor<string, []>("op_4667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4667_cast_fp16 = einsum(equation = var_4667_equation_0, values = (var_4465_cast_fp16, var_4347_cast_fp16))[name = tensor<string, []>("op_4667_cast_fp16")];
+            tensor<fp16, []> var_4668_to_fp16 = const()[name = tensor<string, []>("op_4668_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_457_cast_fp16 = mul(x = var_4667_cast_fp16, y = var_4668_to_fp16)[name = tensor<string, []>("aw_chunk_457_cast_fp16")];
+            tensor<string, []> var_4671_equation_0 = const()[name = tensor<string, []>("op_4671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4671_cast_fp16 = einsum(equation = var_4671_equation_0, values = (var_4465_cast_fp16, var_4354_cast_fp16))[name = tensor<string, []>("op_4671_cast_fp16")];
+            tensor<fp16, []> var_4672_to_fp16 = const()[name = tensor<string, []>("op_4672_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_459_cast_fp16 = mul(x = var_4671_cast_fp16, y = var_4672_to_fp16)[name = tensor<string, []>("aw_chunk_459_cast_fp16")];
+            tensor<string, []> var_4675_equation_0 = const()[name = tensor<string, []>("op_4675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4675_cast_fp16 = einsum(equation = var_4675_equation_0, values = (var_4465_cast_fp16, var_4361_cast_fp16))[name = tensor<string, []>("op_4675_cast_fp16")];
+            tensor<fp16, []> var_4676_to_fp16 = const()[name = tensor<string, []>("op_4676_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_461_cast_fp16 = mul(x = var_4675_cast_fp16, y = var_4676_to_fp16)[name = tensor<string, []>("aw_chunk_461_cast_fp16")];
+            tensor<string, []> var_4679_equation_0 = const()[name = tensor<string, []>("op_4679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4679_cast_fp16 = einsum(equation = var_4679_equation_0, values = (var_4465_cast_fp16, var_4368_cast_fp16))[name = tensor<string, []>("op_4679_cast_fp16")];
+            tensor<fp16, []> var_4680_to_fp16 = const()[name = tensor<string, []>("op_4680_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_463_cast_fp16 = mul(x = var_4679_cast_fp16, y = var_4680_to_fp16)[name = tensor<string, []>("aw_chunk_463_cast_fp16")];
+            tensor<string, []> var_4683_equation_0 = const()[name = tensor<string, []>("op_4683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4683_cast_fp16 = einsum(equation = var_4683_equation_0, values = (var_4469_cast_fp16, var_4375_cast_fp16))[name = tensor<string, []>("op_4683_cast_fp16")];
+            tensor<fp16, []> var_4684_to_fp16 = const()[name = tensor<string, []>("op_4684_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_465_cast_fp16 = mul(x = var_4683_cast_fp16, y = var_4684_to_fp16)[name = tensor<string, []>("aw_chunk_465_cast_fp16")];
+            tensor<string, []> var_4687_equation_0 = const()[name = tensor<string, []>("op_4687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4687_cast_fp16 = einsum(equation = var_4687_equation_0, values = (var_4469_cast_fp16, var_4382_cast_fp16))[name = tensor<string, []>("op_4687_cast_fp16")];
+            tensor<fp16, []> var_4688_to_fp16 = const()[name = tensor<string, []>("op_4688_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_467_cast_fp16 = mul(x = var_4687_cast_fp16, y = var_4688_to_fp16)[name = tensor<string, []>("aw_chunk_467_cast_fp16")];
+            tensor<string, []> var_4691_equation_0 = const()[name = tensor<string, []>("op_4691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4691_cast_fp16 = einsum(equation = var_4691_equation_0, values = (var_4469_cast_fp16, var_4389_cast_fp16))[name = tensor<string, []>("op_4691_cast_fp16")];
+            tensor<fp16, []> var_4692_to_fp16 = const()[name = tensor<string, []>("op_4692_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_469_cast_fp16 = mul(x = var_4691_cast_fp16, y = var_4692_to_fp16)[name = tensor<string, []>("aw_chunk_469_cast_fp16")];
+            tensor<string, []> var_4695_equation_0 = const()[name = tensor<string, []>("op_4695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4695_cast_fp16 = einsum(equation = var_4695_equation_0, values = (var_4469_cast_fp16, var_4396_cast_fp16))[name = tensor<string, []>("op_4695_cast_fp16")];
+            tensor<fp16, []> var_4696_to_fp16 = const()[name = tensor<string, []>("op_4696_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_471_cast_fp16 = mul(x = var_4695_cast_fp16, y = var_4696_to_fp16)[name = tensor<string, []>("aw_chunk_471_cast_fp16")];
+            tensor<string, []> var_4699_equation_0 = const()[name = tensor<string, []>("op_4699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4699_cast_fp16 = einsum(equation = var_4699_equation_0, values = (var_4473_cast_fp16, var_4403_cast_fp16))[name = tensor<string, []>("op_4699_cast_fp16")];
+            tensor<fp16, []> var_4700_to_fp16 = const()[name = tensor<string, []>("op_4700_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_473_cast_fp16 = mul(x = var_4699_cast_fp16, y = var_4700_to_fp16)[name = tensor<string, []>("aw_chunk_473_cast_fp16")];
+            tensor<string, []> var_4703_equation_0 = const()[name = tensor<string, []>("op_4703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4703_cast_fp16 = einsum(equation = var_4703_equation_0, values = (var_4473_cast_fp16, var_4410_cast_fp16))[name = tensor<string, []>("op_4703_cast_fp16")];
+            tensor<fp16, []> var_4704_to_fp16 = const()[name = tensor<string, []>("op_4704_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_475_cast_fp16 = mul(x = var_4703_cast_fp16, y = var_4704_to_fp16)[name = tensor<string, []>("aw_chunk_475_cast_fp16")];
+            tensor<string, []> var_4707_equation_0 = const()[name = tensor<string, []>("op_4707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4707_cast_fp16 = einsum(equation = var_4707_equation_0, values = (var_4473_cast_fp16, var_4417_cast_fp16))[name = tensor<string, []>("op_4707_cast_fp16")];
+            tensor<fp16, []> var_4708_to_fp16 = const()[name = tensor<string, []>("op_4708_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_477_cast_fp16 = mul(x = var_4707_cast_fp16, y = var_4708_to_fp16)[name = tensor<string, []>("aw_chunk_477_cast_fp16")];
+            tensor<string, []> var_4711_equation_0 = const()[name = tensor<string, []>("op_4711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4711_cast_fp16 = einsum(equation = var_4711_equation_0, values = (var_4473_cast_fp16, var_4424_cast_fp16))[name = tensor<string, []>("op_4711_cast_fp16")];
+            tensor<fp16, []> var_4712_to_fp16 = const()[name = tensor<string, []>("op_4712_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_479_cast_fp16 = mul(x = var_4711_cast_fp16, y = var_4712_to_fp16)[name = tensor<string, []>("aw_chunk_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4714_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_385_cast_fp16)[name = tensor<string, []>("op_4714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4715_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_387_cast_fp16)[name = tensor<string, []>("op_4715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4716_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_389_cast_fp16)[name = tensor<string, []>("op_4716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4717_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_391_cast_fp16)[name = tensor<string, []>("op_4717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4718_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_393_cast_fp16)[name = tensor<string, []>("op_4718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4719_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_395_cast_fp16)[name = tensor<string, []>("op_4719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4720_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_397_cast_fp16)[name = tensor<string, []>("op_4720_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4721_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_399_cast_fp16)[name = tensor<string, []>("op_4721_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4722_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_401_cast_fp16)[name = tensor<string, []>("op_4722_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4723_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_403_cast_fp16)[name = tensor<string, []>("op_4723_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4724_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_405_cast_fp16)[name = tensor<string, []>("op_4724_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4725_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_407_cast_fp16)[name = tensor<string, []>("op_4725_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4726_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_409_cast_fp16)[name = tensor<string, []>("op_4726_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4727_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_411_cast_fp16)[name = tensor<string, []>("op_4727_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4728_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_413_cast_fp16)[name = tensor<string, []>("op_4728_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4729_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_415_cast_fp16)[name = tensor<string, []>("op_4729_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4730_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_417_cast_fp16)[name = tensor<string, []>("op_4730_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4731_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_419_cast_fp16)[name = tensor<string, []>("op_4731_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4732_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_421_cast_fp16)[name = tensor<string, []>("op_4732_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4733_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_423_cast_fp16)[name = tensor<string, []>("op_4733_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4734_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_425_cast_fp16)[name = tensor<string, []>("op_4734_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4735_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_427_cast_fp16)[name = tensor<string, []>("op_4735_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4736_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_429_cast_fp16)[name = tensor<string, []>("op_4736_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4737_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_431_cast_fp16)[name = tensor<string, []>("op_4737_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4738_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_433_cast_fp16)[name = tensor<string, []>("op_4738_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4739_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_435_cast_fp16)[name = tensor<string, []>("op_4739_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4740_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_437_cast_fp16)[name = tensor<string, []>("op_4740_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4741_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_439_cast_fp16)[name = tensor<string, []>("op_4741_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4742_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_441_cast_fp16)[name = tensor<string, []>("op_4742_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4743_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_443_cast_fp16)[name = tensor<string, []>("op_4743_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4744_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_445_cast_fp16)[name = tensor<string, []>("op_4744_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4745_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_447_cast_fp16)[name = tensor<string, []>("op_4745_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4746_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_449_cast_fp16)[name = tensor<string, []>("op_4746_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4747_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_451_cast_fp16)[name = tensor<string, []>("op_4747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4748_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_453_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4749_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_455_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4750_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_457_cast_fp16)[name = tensor<string, []>("op_4750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4751_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_459_cast_fp16)[name = tensor<string, []>("op_4751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4752_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_461_cast_fp16)[name = tensor<string, []>("op_4752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4753_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_463_cast_fp16)[name = tensor<string, []>("op_4753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4754_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_465_cast_fp16)[name = tensor<string, []>("op_4754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4755_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_467_cast_fp16)[name = tensor<string, []>("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4756_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_469_cast_fp16)[name = tensor<string, []>("op_4756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4757_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_471_cast_fp16)[name = tensor<string, []>("op_4757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4758_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_473_cast_fp16)[name = tensor<string, []>("op_4758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4759_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_475_cast_fp16)[name = tensor<string, []>("op_4759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4760_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_477_cast_fp16)[name = tensor<string, []>("op_4760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4761_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_479_cast_fp16)[name = tensor<string, []>("op_4761_cast_fp16")];
+            tensor<string, []> var_4763_equation_0 = const()[name = tensor<string, []>("op_4763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4763_cast_fp16 = einsum(equation = var_4763_equation_0, values = (var_4475_cast_fp16, var_4714_cast_fp16))[name = tensor<string, []>("op_4763_cast_fp16")];
+            tensor<string, []> var_4765_equation_0 = const()[name = tensor<string, []>("op_4765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4765_cast_fp16 = einsum(equation = var_4765_equation_0, values = (var_4475_cast_fp16, var_4715_cast_fp16))[name = tensor<string, []>("op_4765_cast_fp16")];
+            tensor<string, []> var_4767_equation_0 = const()[name = tensor<string, []>("op_4767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4767_cast_fp16 = einsum(equation = var_4767_equation_0, values = (var_4475_cast_fp16, var_4716_cast_fp16))[name = tensor<string, []>("op_4767_cast_fp16")];
+            tensor<string, []> var_4769_equation_0 = const()[name = tensor<string, []>("op_4769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4769_cast_fp16 = einsum(equation = var_4769_equation_0, values = (var_4475_cast_fp16, var_4717_cast_fp16))[name = tensor<string, []>("op_4769_cast_fp16")];
+            tensor<string, []> var_4771_equation_0 = const()[name = tensor<string, []>("op_4771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4771_cast_fp16 = einsum(equation = var_4771_equation_0, values = (var_4479_cast_fp16, var_4718_cast_fp16))[name = tensor<string, []>("op_4771_cast_fp16")];
+            tensor<string, []> var_4773_equation_0 = const()[name = tensor<string, []>("op_4773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4773_cast_fp16 = einsum(equation = var_4773_equation_0, values = (var_4479_cast_fp16, var_4719_cast_fp16))[name = tensor<string, []>("op_4773_cast_fp16")];
+            tensor<string, []> var_4775_equation_0 = const()[name = tensor<string, []>("op_4775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4775_cast_fp16 = einsum(equation = var_4775_equation_0, values = (var_4479_cast_fp16, var_4720_cast_fp16))[name = tensor<string, []>("op_4775_cast_fp16")];
+            tensor<string, []> var_4777_equation_0 = const()[name = tensor<string, []>("op_4777_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4777_cast_fp16 = einsum(equation = var_4777_equation_0, values = (var_4479_cast_fp16, var_4721_cast_fp16))[name = tensor<string, []>("op_4777_cast_fp16")];
+            tensor<string, []> var_4779_equation_0 = const()[name = tensor<string, []>("op_4779_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4779_cast_fp16 = einsum(equation = var_4779_equation_0, values = (var_4483_cast_fp16, var_4722_cast_fp16))[name = tensor<string, []>("op_4779_cast_fp16")];
+            tensor<string, []> var_4781_equation_0 = const()[name = tensor<string, []>("op_4781_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4781_cast_fp16 = einsum(equation = var_4781_equation_0, values = (var_4483_cast_fp16, var_4723_cast_fp16))[name = tensor<string, []>("op_4781_cast_fp16")];
+            tensor<string, []> var_4783_equation_0 = const()[name = tensor<string, []>("op_4783_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4783_cast_fp16 = einsum(equation = var_4783_equation_0, values = (var_4483_cast_fp16, var_4724_cast_fp16))[name = tensor<string, []>("op_4783_cast_fp16")];
+            tensor<string, []> var_4785_equation_0 = const()[name = tensor<string, []>("op_4785_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4785_cast_fp16 = einsum(equation = var_4785_equation_0, values = (var_4483_cast_fp16, var_4725_cast_fp16))[name = tensor<string, []>("op_4785_cast_fp16")];
+            tensor<string, []> var_4787_equation_0 = const()[name = tensor<string, []>("op_4787_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4787_cast_fp16 = einsum(equation = var_4787_equation_0, values = (var_4487_cast_fp16, var_4726_cast_fp16))[name = tensor<string, []>("op_4787_cast_fp16")];
+            tensor<string, []> var_4789_equation_0 = const()[name = tensor<string, []>("op_4789_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4789_cast_fp16 = einsum(equation = var_4789_equation_0, values = (var_4487_cast_fp16, var_4727_cast_fp16))[name = tensor<string, []>("op_4789_cast_fp16")];
+            tensor<string, []> var_4791_equation_0 = const()[name = tensor<string, []>("op_4791_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4791_cast_fp16 = einsum(equation = var_4791_equation_0, values = (var_4487_cast_fp16, var_4728_cast_fp16))[name = tensor<string, []>("op_4791_cast_fp16")];
+            tensor<string, []> var_4793_equation_0 = const()[name = tensor<string, []>("op_4793_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4793_cast_fp16 = einsum(equation = var_4793_equation_0, values = (var_4487_cast_fp16, var_4729_cast_fp16))[name = tensor<string, []>("op_4793_cast_fp16")];
+            tensor<string, []> var_4795_equation_0 = const()[name = tensor<string, []>("op_4795_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4795_cast_fp16 = einsum(equation = var_4795_equation_0, values = (var_4491_cast_fp16, var_4730_cast_fp16))[name = tensor<string, []>("op_4795_cast_fp16")];
+            tensor<string, []> var_4797_equation_0 = const()[name = tensor<string, []>("op_4797_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4797_cast_fp16 = einsum(equation = var_4797_equation_0, values = (var_4491_cast_fp16, var_4731_cast_fp16))[name = tensor<string, []>("op_4797_cast_fp16")];
+            tensor<string, []> var_4799_equation_0 = const()[name = tensor<string, []>("op_4799_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4799_cast_fp16 = einsum(equation = var_4799_equation_0, values = (var_4491_cast_fp16, var_4732_cast_fp16))[name = tensor<string, []>("op_4799_cast_fp16")];
+            tensor<string, []> var_4801_equation_0 = const()[name = tensor<string, []>("op_4801_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4801_cast_fp16 = einsum(equation = var_4801_equation_0, values = (var_4491_cast_fp16, var_4733_cast_fp16))[name = tensor<string, []>("op_4801_cast_fp16")];
+            tensor<string, []> var_4803_equation_0 = const()[name = tensor<string, []>("op_4803_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4803_cast_fp16 = einsum(equation = var_4803_equation_0, values = (var_4495_cast_fp16, var_4734_cast_fp16))[name = tensor<string, []>("op_4803_cast_fp16")];
+            tensor<string, []> var_4805_equation_0 = const()[name = tensor<string, []>("op_4805_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4805_cast_fp16 = einsum(equation = var_4805_equation_0, values = (var_4495_cast_fp16, var_4735_cast_fp16))[name = tensor<string, []>("op_4805_cast_fp16")];
+            tensor<string, []> var_4807_equation_0 = const()[name = tensor<string, []>("op_4807_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4807_cast_fp16 = einsum(equation = var_4807_equation_0, values = (var_4495_cast_fp16, var_4736_cast_fp16))[name = tensor<string, []>("op_4807_cast_fp16")];
+            tensor<string, []> var_4809_equation_0 = const()[name = tensor<string, []>("op_4809_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4809_cast_fp16 = einsum(equation = var_4809_equation_0, values = (var_4495_cast_fp16, var_4737_cast_fp16))[name = tensor<string, []>("op_4809_cast_fp16")];
+            tensor<string, []> var_4811_equation_0 = const()[name = tensor<string, []>("op_4811_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4811_cast_fp16 = einsum(equation = var_4811_equation_0, values = (var_4499_cast_fp16, var_4738_cast_fp16))[name = tensor<string, []>("op_4811_cast_fp16")];
+            tensor<string, []> var_4813_equation_0 = const()[name = tensor<string, []>("op_4813_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4813_cast_fp16 = einsum(equation = var_4813_equation_0, values = (var_4499_cast_fp16, var_4739_cast_fp16))[name = tensor<string, []>("op_4813_cast_fp16")];
+            tensor<string, []> var_4815_equation_0 = const()[name = tensor<string, []>("op_4815_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4815_cast_fp16 = einsum(equation = var_4815_equation_0, values = (var_4499_cast_fp16, var_4740_cast_fp16))[name = tensor<string, []>("op_4815_cast_fp16")];
+            tensor<string, []> var_4817_equation_0 = const()[name = tensor<string, []>("op_4817_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4817_cast_fp16 = einsum(equation = var_4817_equation_0, values = (var_4499_cast_fp16, var_4741_cast_fp16))[name = tensor<string, []>("op_4817_cast_fp16")];
+            tensor<string, []> var_4819_equation_0 = const()[name = tensor<string, []>("op_4819_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4819_cast_fp16 = einsum(equation = var_4819_equation_0, values = (var_4503_cast_fp16, var_4742_cast_fp16))[name = tensor<string, []>("op_4819_cast_fp16")];
+            tensor<string, []> var_4821_equation_0 = const()[name = tensor<string, []>("op_4821_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4821_cast_fp16 = einsum(equation = var_4821_equation_0, values = (var_4503_cast_fp16, var_4743_cast_fp16))[name = tensor<string, []>("op_4821_cast_fp16")];
+            tensor<string, []> var_4823_equation_0 = const()[name = tensor<string, []>("op_4823_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4823_cast_fp16 = einsum(equation = var_4823_equation_0, values = (var_4503_cast_fp16, var_4744_cast_fp16))[name = tensor<string, []>("op_4823_cast_fp16")];
+            tensor<string, []> var_4825_equation_0 = const()[name = tensor<string, []>("op_4825_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4825_cast_fp16 = einsum(equation = var_4825_equation_0, values = (var_4503_cast_fp16, var_4745_cast_fp16))[name = tensor<string, []>("op_4825_cast_fp16")];
+            tensor<string, []> var_4827_equation_0 = const()[name = tensor<string, []>("op_4827_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4827_cast_fp16 = einsum(equation = var_4827_equation_0, values = (var_4507_cast_fp16, var_4746_cast_fp16))[name = tensor<string, []>("op_4827_cast_fp16")];
+            tensor<string, []> var_4829_equation_0 = const()[name = tensor<string, []>("op_4829_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4829_cast_fp16 = einsum(equation = var_4829_equation_0, values = (var_4507_cast_fp16, var_4747_cast_fp16))[name = tensor<string, []>("op_4829_cast_fp16")];
+            tensor<string, []> var_4831_equation_0 = const()[name = tensor<string, []>("op_4831_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4831_cast_fp16 = einsum(equation = var_4831_equation_0, values = (var_4507_cast_fp16, var_4748_cast_fp16))[name = tensor<string, []>("op_4831_cast_fp16")];
+            tensor<string, []> var_4833_equation_0 = const()[name = tensor<string, []>("op_4833_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4833_cast_fp16 = einsum(equation = var_4833_equation_0, values = (var_4507_cast_fp16, var_4749_cast_fp16))[name = tensor<string, []>("op_4833_cast_fp16")];
+            tensor<string, []> var_4835_equation_0 = const()[name = tensor<string, []>("op_4835_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4835_cast_fp16 = einsum(equation = var_4835_equation_0, values = (var_4511_cast_fp16, var_4750_cast_fp16))[name = tensor<string, []>("op_4835_cast_fp16")];
+            tensor<string, []> var_4837_equation_0 = const()[name = tensor<string, []>("op_4837_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4837_cast_fp16 = einsum(equation = var_4837_equation_0, values = (var_4511_cast_fp16, var_4751_cast_fp16))[name = tensor<string, []>("op_4837_cast_fp16")];
+            tensor<string, []> var_4839_equation_0 = const()[name = tensor<string, []>("op_4839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4839_cast_fp16 = einsum(equation = var_4839_equation_0, values = (var_4511_cast_fp16, var_4752_cast_fp16))[name = tensor<string, []>("op_4839_cast_fp16")];
+            tensor<string, []> var_4841_equation_0 = const()[name = tensor<string, []>("op_4841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4841_cast_fp16 = einsum(equation = var_4841_equation_0, values = (var_4511_cast_fp16, var_4753_cast_fp16))[name = tensor<string, []>("op_4841_cast_fp16")];
+            tensor<string, []> var_4843_equation_0 = const()[name = tensor<string, []>("op_4843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4843_cast_fp16 = einsum(equation = var_4843_equation_0, values = (var_4515_cast_fp16, var_4754_cast_fp16))[name = tensor<string, []>("op_4843_cast_fp16")];
+            tensor<string, []> var_4845_equation_0 = const()[name = tensor<string, []>("op_4845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4845_cast_fp16 = einsum(equation = var_4845_equation_0, values = (var_4515_cast_fp16, var_4755_cast_fp16))[name = tensor<string, []>("op_4845_cast_fp16")];
+            tensor<string, []> var_4847_equation_0 = const()[name = tensor<string, []>("op_4847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4847_cast_fp16 = einsum(equation = var_4847_equation_0, values = (var_4515_cast_fp16, var_4756_cast_fp16))[name = tensor<string, []>("op_4847_cast_fp16")];
+            tensor<string, []> var_4849_equation_0 = const()[name = tensor<string, []>("op_4849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4849_cast_fp16 = einsum(equation = var_4849_equation_0, values = (var_4515_cast_fp16, var_4757_cast_fp16))[name = tensor<string, []>("op_4849_cast_fp16")];
+            tensor<string, []> var_4851_equation_0 = const()[name = tensor<string, []>("op_4851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4851_cast_fp16 = einsum(equation = var_4851_equation_0, values = (var_4519_cast_fp16, var_4758_cast_fp16))[name = tensor<string, []>("op_4851_cast_fp16")];
+            tensor<string, []> var_4853_equation_0 = const()[name = tensor<string, []>("op_4853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4853_cast_fp16 = einsum(equation = var_4853_equation_0, values = (var_4519_cast_fp16, var_4759_cast_fp16))[name = tensor<string, []>("op_4853_cast_fp16")];
+            tensor<string, []> var_4855_equation_0 = const()[name = tensor<string, []>("op_4855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4855_cast_fp16 = einsum(equation = var_4855_equation_0, values = (var_4519_cast_fp16, var_4760_cast_fp16))[name = tensor<string, []>("op_4855_cast_fp16")];
+            tensor<string, []> var_4857_equation_0 = const()[name = tensor<string, []>("op_4857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4857_cast_fp16 = einsum(equation = var_4857_equation_0, values = (var_4519_cast_fp16, var_4761_cast_fp16))[name = tensor<string, []>("op_4857_cast_fp16")];
+            tensor<bool, []> var_4859_interleave_0 = const()[name = tensor<string, []>("op_4859_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4859_cast_fp16 = concat(axis = var_3970, interleave = var_4859_interleave_0, values = (var_4763_cast_fp16, var_4765_cast_fp16, var_4767_cast_fp16, var_4769_cast_fp16))[name = tensor<string, []>("op_4859_cast_fp16")];
+            tensor<bool, []> var_4861_interleave_0 = const()[name = tensor<string, []>("op_4861_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4861_cast_fp16 = concat(axis = var_3970, interleave = var_4861_interleave_0, values = (var_4771_cast_fp16, var_4773_cast_fp16, var_4775_cast_fp16, var_4777_cast_fp16))[name = tensor<string, []>("op_4861_cast_fp16")];
+            tensor<bool, []> var_4863_interleave_0 = const()[name = tensor<string, []>("op_4863_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4863_cast_fp16 = concat(axis = var_3970, interleave = var_4863_interleave_0, values = (var_4779_cast_fp16, var_4781_cast_fp16, var_4783_cast_fp16, var_4785_cast_fp16))[name = tensor<string, []>("op_4863_cast_fp16")];
+            tensor<bool, []> var_4865_interleave_0 = const()[name = tensor<string, []>("op_4865_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4865_cast_fp16 = concat(axis = var_3970, interleave = var_4865_interleave_0, values = (var_4787_cast_fp16, var_4789_cast_fp16, var_4791_cast_fp16, var_4793_cast_fp16))[name = tensor<string, []>("op_4865_cast_fp16")];
+            tensor<bool, []> var_4867_interleave_0 = const()[name = tensor<string, []>("op_4867_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4867_cast_fp16 = concat(axis = var_3970, interleave = var_4867_interleave_0, values = (var_4795_cast_fp16, var_4797_cast_fp16, var_4799_cast_fp16, var_4801_cast_fp16))[name = tensor<string, []>("op_4867_cast_fp16")];
+            tensor<bool, []> var_4869_interleave_0 = const()[name = tensor<string, []>("op_4869_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4869_cast_fp16 = concat(axis = var_3970, interleave = var_4869_interleave_0, values = (var_4803_cast_fp16, var_4805_cast_fp16, var_4807_cast_fp16, var_4809_cast_fp16))[name = tensor<string, []>("op_4869_cast_fp16")];
+            tensor<bool, []> var_4871_interleave_0 = const()[name = tensor<string, []>("op_4871_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4871_cast_fp16 = concat(axis = var_3970, interleave = var_4871_interleave_0, values = (var_4811_cast_fp16, var_4813_cast_fp16, var_4815_cast_fp16, var_4817_cast_fp16))[name = tensor<string, []>("op_4871_cast_fp16")];
+            tensor<bool, []> var_4873_interleave_0 = const()[name = tensor<string, []>("op_4873_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4873_cast_fp16 = concat(axis = var_3970, interleave = var_4873_interleave_0, values = (var_4819_cast_fp16, var_4821_cast_fp16, var_4823_cast_fp16, var_4825_cast_fp16))[name = tensor<string, []>("op_4873_cast_fp16")];
+            tensor<bool, []> var_4875_interleave_0 = const()[name = tensor<string, []>("op_4875_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4875_cast_fp16 = concat(axis = var_3970, interleave = var_4875_interleave_0, values = (var_4827_cast_fp16, var_4829_cast_fp16, var_4831_cast_fp16, var_4833_cast_fp16))[name = tensor<string, []>("op_4875_cast_fp16")];
+            tensor<bool, []> var_4877_interleave_0 = const()[name = tensor<string, []>("op_4877_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4877_cast_fp16 = concat(axis = var_3970, interleave = var_4877_interleave_0, values = (var_4835_cast_fp16, var_4837_cast_fp16, var_4839_cast_fp16, var_4841_cast_fp16))[name = tensor<string, []>("op_4877_cast_fp16")];
+            tensor<bool, []> var_4879_interleave_0 = const()[name = tensor<string, []>("op_4879_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4879_cast_fp16 = concat(axis = var_3970, interleave = var_4879_interleave_0, values = (var_4843_cast_fp16, var_4845_cast_fp16, var_4847_cast_fp16, var_4849_cast_fp16))[name = tensor<string, []>("op_4879_cast_fp16")];
+            tensor<bool, []> var_4881_interleave_0 = const()[name = tensor<string, []>("op_4881_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4881_cast_fp16 = concat(axis = var_3970, interleave = var_4881_interleave_0, values = (var_4851_cast_fp16, var_4853_cast_fp16, var_4855_cast_fp16, var_4857_cast_fp16))[name = tensor<string, []>("op_4881_cast_fp16")];
+            tensor<bool, []> input_33_interleave_0 = const()[name = tensor<string, []>("input_33_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = concat(axis = var_3987, interleave = input_33_interleave_0, values = (var_4859_cast_fp16, var_4861_cast_fp16, var_4863_cast_fp16, var_4865_cast_fp16, var_4867_cast_fp16, var_4869_cast_fp16, var_4871_cast_fp16, var_4873_cast_fp16, var_4875_cast_fp16, var_4877_cast_fp16, var_4879_cast_fp16, var_4881_cast_fp16))[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_4886 = const()[name = tensor<string, []>("op_4886"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4888 = const()[name = tensor<string, []>("op_4888"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66464448)))];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67644160)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_4888, groups = var_3987, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_4886, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> var_4894 = const()[name = tensor<string, []>("op_4894"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_4894, keep_dims = var_3988, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_4898 = const()[name = tensor<string, []>("op_4898"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4899_cast_fp16 = reduce_mean(axes = var_4898, keep_dims = var_3988, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_4899_cast_fp16")];
+            tensor<fp16, []> var_4900_to_fp16 = const()[name = tensor<string, []>("op_4900_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4901_cast_fp16 = add(x = var_4899_cast_fp16, y = var_4900_to_fp16)[name = tensor<string, []>("op_4901_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_4901_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67645760)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67647360)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_4912 = const()[name = tensor<string, []>("op_4912"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4914 = const()[name = tensor<string, []>("op_4914"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67648960)))];
+            tensor<fp16, [3072]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72367616)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_4914, groups = var_3987, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_4912, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_4920 = const()[name = tensor<string, []>("op_4920"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4922 = const()[name = tensor<string, []>("op_4922"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72373824)))];
+            tensor<fp16, [768]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77092480)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_4922, groups = var_3987, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_4920, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_4929 = const()[name = tensor<string, []>("op_4929"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_4946 = const()[name = tensor<string, []>("op_4946"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_4947 = const()[name = tensor<string, []>("op_4947"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_4957 = const()[name = tensor<string, []>("op_4957"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_4957, keep_dims = var_4947, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_4961 = const()[name = tensor<string, []>("op_4961"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4962_cast_fp16 = reduce_mean(axes = var_4961, keep_dims = var_4947, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_4962_cast_fp16")];
+            tensor<fp16, []> var_4963_to_fp16 = const()[name = tensor<string, []>("op_4963_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4964_cast_fp16 = add(x = var_4962_cast_fp16, y = var_4963_to_fp16)[name = tensor<string, []>("op_4964_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_4964_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77094080)))];
+            tensor<fp16, [768]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77095680)))];
+            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<int32, [2]> var_4979 = const()[name = tensor<string, []>("op_4979"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4981 = const()[name = tensor<string, []>("op_4981"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77097280)))];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78276992)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_11_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_4981, groups = var_4946, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_4979, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_4985 = const()[name = tensor<string, []>("op_4985"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4987 = const()[name = tensor<string, []>("op_4987"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78278592)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_4987, groups = var_4946, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_4985, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_4992 = const()[name = tensor<string, []>("op_4992"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4994 = const()[name = tensor<string, []>("op_4994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79458304)))];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80638016)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_4994, groups = var_4946, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_4992, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_5001_begin_0 = const()[name = tensor<string, []>("op_5001_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5001_end_0 = const()[name = tensor<string, []>("op_5001_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5001_end_mask_0 = const()[name = tensor<string, []>("op_5001_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5001_cast_fp16 = slice_by_index(begin = var_5001_begin_0, end = var_5001_end_0, end_mask = var_5001_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5001_cast_fp16")];
+            tensor<int32, [4]> var_5005_begin_0 = const()[name = tensor<string, []>("op_5005_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5005_end_0 = const()[name = tensor<string, []>("op_5005_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5005_end_mask_0 = const()[name = tensor<string, []>("op_5005_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5005_cast_fp16 = slice_by_index(begin = var_5005_begin_0, end = var_5005_end_0, end_mask = var_5005_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5005_cast_fp16")];
+            tensor<int32, [4]> var_5009_begin_0 = const()[name = tensor<string, []>("op_5009_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5009_end_0 = const()[name = tensor<string, []>("op_5009_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5009_end_mask_0 = const()[name = tensor<string, []>("op_5009_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5009_cast_fp16 = slice_by_index(begin = var_5009_begin_0, end = var_5009_end_0, end_mask = var_5009_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5009_cast_fp16")];
+            tensor<int32, [4]> var_5013_begin_0 = const()[name = tensor<string, []>("op_5013_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5013_end_0 = const()[name = tensor<string, []>("op_5013_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5013_end_mask_0 = const()[name = tensor<string, []>("op_5013_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5013_cast_fp16 = slice_by_index(begin = var_5013_begin_0, end = var_5013_end_0, end_mask = var_5013_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5013_cast_fp16")];
+            tensor<int32, [4]> var_5017_begin_0 = const()[name = tensor<string, []>("op_5017_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5017_end_0 = const()[name = tensor<string, []>("op_5017_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5017_end_mask_0 = const()[name = tensor<string, []>("op_5017_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5017_cast_fp16 = slice_by_index(begin = var_5017_begin_0, end = var_5017_end_0, end_mask = var_5017_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<int32, [4]> var_5021_begin_0 = const()[name = tensor<string, []>("op_5021_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5021_end_0 = const()[name = tensor<string, []>("op_5021_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5021_end_mask_0 = const()[name = tensor<string, []>("op_5021_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5021_cast_fp16 = slice_by_index(begin = var_5021_begin_0, end = var_5021_end_0, end_mask = var_5021_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5021_cast_fp16")];
+            tensor<int32, [4]> var_5025_begin_0 = const()[name = tensor<string, []>("op_5025_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5025_end_0 = const()[name = tensor<string, []>("op_5025_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5025_end_mask_0 = const()[name = tensor<string, []>("op_5025_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5025_cast_fp16 = slice_by_index(begin = var_5025_begin_0, end = var_5025_end_0, end_mask = var_5025_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5025_cast_fp16")];
+            tensor<int32, [4]> var_5029_begin_0 = const()[name = tensor<string, []>("op_5029_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5029_end_0 = const()[name = tensor<string, []>("op_5029_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5029_end_mask_0 = const()[name = tensor<string, []>("op_5029_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5029_cast_fp16 = slice_by_index(begin = var_5029_begin_0, end = var_5029_end_0, end_mask = var_5029_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5029_cast_fp16")];
+            tensor<int32, [4]> var_5033_begin_0 = const()[name = tensor<string, []>("op_5033_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5033_end_0 = const()[name = tensor<string, []>("op_5033_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5033_end_mask_0 = const()[name = tensor<string, []>("op_5033_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5033_cast_fp16 = slice_by_index(begin = var_5033_begin_0, end = var_5033_end_0, end_mask = var_5033_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5033_cast_fp16")];
+            tensor<int32, [4]> var_5037_begin_0 = const()[name = tensor<string, []>("op_5037_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5037_end_0 = const()[name = tensor<string, []>("op_5037_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5037_end_mask_0 = const()[name = tensor<string, []>("op_5037_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5037_cast_fp16 = slice_by_index(begin = var_5037_begin_0, end = var_5037_end_0, end_mask = var_5037_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5037_cast_fp16")];
+            tensor<int32, [4]> var_5041_begin_0 = const()[name = tensor<string, []>("op_5041_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_5041_end_0 = const()[name = tensor<string, []>("op_5041_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_5041_end_mask_0 = const()[name = tensor<string, []>("op_5041_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5041_cast_fp16 = slice_by_index(begin = var_5041_begin_0, end = var_5041_end_0, end_mask = var_5041_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5041_cast_fp16")];
+            tensor<int32, [4]> var_5045_begin_0 = const()[name = tensor<string, []>("op_5045_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_5045_end_0 = const()[name = tensor<string, []>("op_5045_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_5045_end_mask_0 = const()[name = tensor<string, []>("op_5045_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5045_cast_fp16 = slice_by_index(begin = var_5045_begin_0, end = var_5045_end_0, end_mask = var_5045_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5045_cast_fp16")];
+            tensor<int32, [4]> var_5054_begin_0 = const()[name = tensor<string, []>("op_5054_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5054_end_0 = const()[name = tensor<string, []>("op_5054_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5054_end_mask_0 = const()[name = tensor<string, []>("op_5054_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5054_cast_fp16 = slice_by_index(begin = var_5054_begin_0, end = var_5054_end_0, end_mask = var_5054_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5054_cast_fp16")];
+            tensor<int32, [4]> var_5061_begin_0 = const()[name = tensor<string, []>("op_5061_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5061_end_0 = const()[name = tensor<string, []>("op_5061_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5061_end_mask_0 = const()[name = tensor<string, []>("op_5061_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5061_cast_fp16 = slice_by_index(begin = var_5061_begin_0, end = var_5061_end_0, end_mask = var_5061_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5061_cast_fp16")];
+            tensor<int32, [4]> var_5068_begin_0 = const()[name = tensor<string, []>("op_5068_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5068_end_0 = const()[name = tensor<string, []>("op_5068_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5068_end_mask_0 = const()[name = tensor<string, []>("op_5068_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5068_cast_fp16 = slice_by_index(begin = var_5068_begin_0, end = var_5068_end_0, end_mask = var_5068_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5068_cast_fp16")];
+            tensor<int32, [4]> var_5075_begin_0 = const()[name = tensor<string, []>("op_5075_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5075_end_0 = const()[name = tensor<string, []>("op_5075_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5075_end_mask_0 = const()[name = tensor<string, []>("op_5075_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5075_cast_fp16 = slice_by_index(begin = var_5075_begin_0, end = var_5075_end_0, end_mask = var_5075_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5075_cast_fp16")];
+            tensor<int32, [4]> var_5082_begin_0 = const()[name = tensor<string, []>("op_5082_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5082_end_0 = const()[name = tensor<string, []>("op_5082_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5082_end_mask_0 = const()[name = tensor<string, []>("op_5082_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5082_cast_fp16")];
+            tensor<int32, [4]> var_5089_begin_0 = const()[name = tensor<string, []>("op_5089_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5089_end_0 = const()[name = tensor<string, []>("op_5089_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5089_end_mask_0 = const()[name = tensor<string, []>("op_5089_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = var_5089_end_0, end_mask = var_5089_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5089_cast_fp16")];
+            tensor<int32, [4]> var_5096_begin_0 = const()[name = tensor<string, []>("op_5096_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5096_end_0 = const()[name = tensor<string, []>("op_5096_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5096_end_mask_0 = const()[name = tensor<string, []>("op_5096_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5096_cast_fp16 = slice_by_index(begin = var_5096_begin_0, end = var_5096_end_0, end_mask = var_5096_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5096_cast_fp16")];
+            tensor<int32, [4]> var_5103_begin_0 = const()[name = tensor<string, []>("op_5103_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5103_end_0 = const()[name = tensor<string, []>("op_5103_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5103_end_mask_0 = const()[name = tensor<string, []>("op_5103_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5103_cast_fp16 = slice_by_index(begin = var_5103_begin_0, end = var_5103_end_0, end_mask = var_5103_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5103_cast_fp16")];
+            tensor<int32, [4]> var_5110_begin_0 = const()[name = tensor<string, []>("op_5110_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5110_end_0 = const()[name = tensor<string, []>("op_5110_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5110_end_mask_0 = const()[name = tensor<string, []>("op_5110_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5110_cast_fp16 = slice_by_index(begin = var_5110_begin_0, end = var_5110_end_0, end_mask = var_5110_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5110_cast_fp16")];
+            tensor<int32, [4]> var_5117_begin_0 = const()[name = tensor<string, []>("op_5117_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5117_end_0 = const()[name = tensor<string, []>("op_5117_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5117_end_mask_0 = const()[name = tensor<string, []>("op_5117_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5117_cast_fp16 = slice_by_index(begin = var_5117_begin_0, end = var_5117_end_0, end_mask = var_5117_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5117_cast_fp16")];
+            tensor<int32, [4]> var_5124_begin_0 = const()[name = tensor<string, []>("op_5124_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5124_end_0 = const()[name = tensor<string, []>("op_5124_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5124_end_mask_0 = const()[name = tensor<string, []>("op_5124_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5124_cast_fp16 = slice_by_index(begin = var_5124_begin_0, end = var_5124_end_0, end_mask = var_5124_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5124_cast_fp16")];
+            tensor<int32, [4]> var_5131_begin_0 = const()[name = tensor<string, []>("op_5131_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5131_end_0 = const()[name = tensor<string, []>("op_5131_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5131_end_mask_0 = const()[name = tensor<string, []>("op_5131_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5131_cast_fp16 = slice_by_index(begin = var_5131_begin_0, end = var_5131_end_0, end_mask = var_5131_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5131_cast_fp16")];
+            tensor<int32, [4]> var_5138_begin_0 = const()[name = tensor<string, []>("op_5138_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5138_end_0 = const()[name = tensor<string, []>("op_5138_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5138_end_mask_0 = const()[name = tensor<string, []>("op_5138_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5138_cast_fp16 = slice_by_index(begin = var_5138_begin_0, end = var_5138_end_0, end_mask = var_5138_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5138_cast_fp16")];
+            tensor<int32, [4]> var_5145_begin_0 = const()[name = tensor<string, []>("op_5145_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5145_end_0 = const()[name = tensor<string, []>("op_5145_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5145_end_mask_0 = const()[name = tensor<string, []>("op_5145_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5145_cast_fp16 = slice_by_index(begin = var_5145_begin_0, end = var_5145_end_0, end_mask = var_5145_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5145_cast_fp16")];
+            tensor<int32, [4]> var_5152_begin_0 = const()[name = tensor<string, []>("op_5152_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5152_end_0 = const()[name = tensor<string, []>("op_5152_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5152_end_mask_0 = const()[name = tensor<string, []>("op_5152_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5152_cast_fp16 = slice_by_index(begin = var_5152_begin_0, end = var_5152_end_0, end_mask = var_5152_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5152_cast_fp16")];
+            tensor<int32, [4]> var_5159_begin_0 = const()[name = tensor<string, []>("op_5159_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5159_end_0 = const()[name = tensor<string, []>("op_5159_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5159_end_mask_0 = const()[name = tensor<string, []>("op_5159_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5159_cast_fp16 = slice_by_index(begin = var_5159_begin_0, end = var_5159_end_0, end_mask = var_5159_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5159_cast_fp16")];
+            tensor<int32, [4]> var_5166_begin_0 = const()[name = tensor<string, []>("op_5166_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5166_end_0 = const()[name = tensor<string, []>("op_5166_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5166_end_mask_0 = const()[name = tensor<string, []>("op_5166_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5166_cast_fp16 = slice_by_index(begin = var_5166_begin_0, end = var_5166_end_0, end_mask = var_5166_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5166_cast_fp16")];
+            tensor<int32, [4]> var_5173_begin_0 = const()[name = tensor<string, []>("op_5173_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5173_end_0 = const()[name = tensor<string, []>("op_5173_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5173_end_mask_0 = const()[name = tensor<string, []>("op_5173_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5173_cast_fp16 = slice_by_index(begin = var_5173_begin_0, end = var_5173_end_0, end_mask = var_5173_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5173_cast_fp16")];
+            tensor<int32, [4]> var_5180_begin_0 = const()[name = tensor<string, []>("op_5180_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5180_end_0 = const()[name = tensor<string, []>("op_5180_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5180_end_mask_0 = const()[name = tensor<string, []>("op_5180_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5180_cast_fp16 = slice_by_index(begin = var_5180_begin_0, end = var_5180_end_0, end_mask = var_5180_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5180_cast_fp16")];
+            tensor<int32, [4]> var_5187_begin_0 = const()[name = tensor<string, []>("op_5187_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5187_end_0 = const()[name = tensor<string, []>("op_5187_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5187_end_mask_0 = const()[name = tensor<string, []>("op_5187_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5187_cast_fp16 = slice_by_index(begin = var_5187_begin_0, end = var_5187_end_0, end_mask = var_5187_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5187_cast_fp16")];
+            tensor<int32, [4]> var_5194_begin_0 = const()[name = tensor<string, []>("op_5194_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5194_end_0 = const()[name = tensor<string, []>("op_5194_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5194_end_mask_0 = const()[name = tensor<string, []>("op_5194_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5194_cast_fp16 = slice_by_index(begin = var_5194_begin_0, end = var_5194_end_0, end_mask = var_5194_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5194_cast_fp16")];
+            tensor<int32, [4]> var_5201_begin_0 = const()[name = tensor<string, []>("op_5201_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5201_end_0 = const()[name = tensor<string, []>("op_5201_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5201_end_mask_0 = const()[name = tensor<string, []>("op_5201_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5201_cast_fp16 = slice_by_index(begin = var_5201_begin_0, end = var_5201_end_0, end_mask = var_5201_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5201_cast_fp16")];
+            tensor<int32, [4]> var_5208_begin_0 = const()[name = tensor<string, []>("op_5208_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5208_end_0 = const()[name = tensor<string, []>("op_5208_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5208_end_mask_0 = const()[name = tensor<string, []>("op_5208_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5208_cast_fp16 = slice_by_index(begin = var_5208_begin_0, end = var_5208_end_0, end_mask = var_5208_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5208_cast_fp16")];
+            tensor<int32, [4]> var_5215_begin_0 = const()[name = tensor<string, []>("op_5215_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5215_end_0 = const()[name = tensor<string, []>("op_5215_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5215_end_mask_0 = const()[name = tensor<string, []>("op_5215_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5215_cast_fp16 = slice_by_index(begin = var_5215_begin_0, end = var_5215_end_0, end_mask = var_5215_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5215_cast_fp16")];
+            tensor<int32, [4]> var_5222_begin_0 = const()[name = tensor<string, []>("op_5222_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5222_end_0 = const()[name = tensor<string, []>("op_5222_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5222_end_mask_0 = const()[name = tensor<string, []>("op_5222_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5222_cast_fp16 = slice_by_index(begin = var_5222_begin_0, end = var_5222_end_0, end_mask = var_5222_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5222_cast_fp16")];
+            tensor<int32, [4]> var_5229_begin_0 = const()[name = tensor<string, []>("op_5229_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5229_end_0 = const()[name = tensor<string, []>("op_5229_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5229_end_mask_0 = const()[name = tensor<string, []>("op_5229_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5229_cast_fp16 = slice_by_index(begin = var_5229_begin_0, end = var_5229_end_0, end_mask = var_5229_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5229_cast_fp16")];
+            tensor<int32, [4]> var_5236_begin_0 = const()[name = tensor<string, []>("op_5236_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5236_end_0 = const()[name = tensor<string, []>("op_5236_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5236_end_mask_0 = const()[name = tensor<string, []>("op_5236_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5236_cast_fp16 = slice_by_index(begin = var_5236_begin_0, end = var_5236_end_0, end_mask = var_5236_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5236_cast_fp16")];
+            tensor<int32, [4]> var_5243_begin_0 = const()[name = tensor<string, []>("op_5243_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5243_end_0 = const()[name = tensor<string, []>("op_5243_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5243_end_mask_0 = const()[name = tensor<string, []>("op_5243_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5243_cast_fp16 = slice_by_index(begin = var_5243_begin_0, end = var_5243_end_0, end_mask = var_5243_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5243_cast_fp16")];
+            tensor<int32, [4]> var_5250_begin_0 = const()[name = tensor<string, []>("op_5250_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5250_end_0 = const()[name = tensor<string, []>("op_5250_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5250_end_mask_0 = const()[name = tensor<string, []>("op_5250_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5250_cast_fp16 = slice_by_index(begin = var_5250_begin_0, end = var_5250_end_0, end_mask = var_5250_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5250_cast_fp16")];
+            tensor<int32, [4]> var_5257_begin_0 = const()[name = tensor<string, []>("op_5257_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5257_end_0 = const()[name = tensor<string, []>("op_5257_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5257_end_mask_0 = const()[name = tensor<string, []>("op_5257_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5257_cast_fp16 = slice_by_index(begin = var_5257_begin_0, end = var_5257_end_0, end_mask = var_5257_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5257_cast_fp16")];
+            tensor<int32, [4]> var_5264_begin_0 = const()[name = tensor<string, []>("op_5264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5264_end_0 = const()[name = tensor<string, []>("op_5264_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5264_end_mask_0 = const()[name = tensor<string, []>("op_5264_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5264_cast_fp16 = slice_by_index(begin = var_5264_begin_0, end = var_5264_end_0, end_mask = var_5264_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5264_cast_fp16")];
+            tensor<int32, [4]> var_5271_begin_0 = const()[name = tensor<string, []>("op_5271_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5271_end_0 = const()[name = tensor<string, []>("op_5271_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5271_end_mask_0 = const()[name = tensor<string, []>("op_5271_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5271_cast_fp16 = slice_by_index(begin = var_5271_begin_0, end = var_5271_end_0, end_mask = var_5271_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5271_cast_fp16")];
+            tensor<int32, [4]> var_5278_begin_0 = const()[name = tensor<string, []>("op_5278_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5278_end_0 = const()[name = tensor<string, []>("op_5278_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5278_end_mask_0 = const()[name = tensor<string, []>("op_5278_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5278_cast_fp16 = slice_by_index(begin = var_5278_begin_0, end = var_5278_end_0, end_mask = var_5278_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5278_cast_fp16")];
+            tensor<int32, [4]> var_5285_begin_0 = const()[name = tensor<string, []>("op_5285_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5285_end_0 = const()[name = tensor<string, []>("op_5285_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5285_end_mask_0 = const()[name = tensor<string, []>("op_5285_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5285_cast_fp16 = slice_by_index(begin = var_5285_begin_0, end = var_5285_end_0, end_mask = var_5285_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5285_cast_fp16")];
+            tensor<int32, [4]> var_5292_begin_0 = const()[name = tensor<string, []>("op_5292_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5292_end_0 = const()[name = tensor<string, []>("op_5292_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5292_end_mask_0 = const()[name = tensor<string, []>("op_5292_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5292_cast_fp16 = slice_by_index(begin = var_5292_begin_0, end = var_5292_end_0, end_mask = var_5292_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5292_cast_fp16")];
+            tensor<int32, [4]> var_5299_begin_0 = const()[name = tensor<string, []>("op_5299_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5299_end_0 = const()[name = tensor<string, []>("op_5299_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5299_end_mask_0 = const()[name = tensor<string, []>("op_5299_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5299_cast_fp16 = slice_by_index(begin = var_5299_begin_0, end = var_5299_end_0, end_mask = var_5299_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5299_cast_fp16")];
+            tensor<int32, [4]> var_5306_begin_0 = const()[name = tensor<string, []>("op_5306_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5306_end_0 = const()[name = tensor<string, []>("op_5306_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5306_end_mask_0 = const()[name = tensor<string, []>("op_5306_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5306_cast_fp16 = slice_by_index(begin = var_5306_begin_0, end = var_5306_end_0, end_mask = var_5306_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5306_cast_fp16")];
+            tensor<int32, [4]> var_5313_begin_0 = const()[name = tensor<string, []>("op_5313_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5313_end_0 = const()[name = tensor<string, []>("op_5313_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5313_end_mask_0 = const()[name = tensor<string, []>("op_5313_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5313_cast_fp16 = slice_by_index(begin = var_5313_begin_0, end = var_5313_end_0, end_mask = var_5313_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5313_cast_fp16")];
+            tensor<int32, [4]> var_5320_begin_0 = const()[name = tensor<string, []>("op_5320_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5320_end_0 = const()[name = tensor<string, []>("op_5320_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5320_end_mask_0 = const()[name = tensor<string, []>("op_5320_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5320_cast_fp16 = slice_by_index(begin = var_5320_begin_0, end = var_5320_end_0, end_mask = var_5320_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5320_cast_fp16")];
+            tensor<int32, [4]> var_5327_begin_0 = const()[name = tensor<string, []>("op_5327_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5327_end_0 = const()[name = tensor<string, []>("op_5327_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5327_end_mask_0 = const()[name = tensor<string, []>("op_5327_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5327_cast_fp16 = slice_by_index(begin = var_5327_begin_0, end = var_5327_end_0, end_mask = var_5327_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5327_cast_fp16")];
+            tensor<int32, [4]> var_5334_begin_0 = const()[name = tensor<string, []>("op_5334_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5334_end_0 = const()[name = tensor<string, []>("op_5334_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5334_end_mask_0 = const()[name = tensor<string, []>("op_5334_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5334_cast_fp16 = slice_by_index(begin = var_5334_begin_0, end = var_5334_end_0, end_mask = var_5334_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5334_cast_fp16")];
+            tensor<int32, [4]> var_5341_begin_0 = const()[name = tensor<string, []>("op_5341_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5341_end_0 = const()[name = tensor<string, []>("op_5341_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5341_end_mask_0 = const()[name = tensor<string, []>("op_5341_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5341_cast_fp16 = slice_by_index(begin = var_5341_begin_0, end = var_5341_end_0, end_mask = var_5341_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5341_cast_fp16")];
+            tensor<int32, [4]> var_5348_begin_0 = const()[name = tensor<string, []>("op_5348_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5348_end_0 = const()[name = tensor<string, []>("op_5348_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5348_end_mask_0 = const()[name = tensor<string, []>("op_5348_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5348_cast_fp16 = slice_by_index(begin = var_5348_begin_0, end = var_5348_end_0, end_mask = var_5348_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5348_cast_fp16")];
+            tensor<int32, [4]> var_5355_begin_0 = const()[name = tensor<string, []>("op_5355_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5355_end_0 = const()[name = tensor<string, []>("op_5355_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5355_end_mask_0 = const()[name = tensor<string, []>("op_5355_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5355_cast_fp16 = slice_by_index(begin = var_5355_begin_0, end = var_5355_end_0, end_mask = var_5355_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5355_cast_fp16")];
+            tensor<int32, [4]> var_5362_begin_0 = const()[name = tensor<string, []>("op_5362_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5362_end_0 = const()[name = tensor<string, []>("op_5362_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5362_end_mask_0 = const()[name = tensor<string, []>("op_5362_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5362_cast_fp16 = slice_by_index(begin = var_5362_begin_0, end = var_5362_end_0, end_mask = var_5362_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5362_cast_fp16")];
+            tensor<int32, [4]> var_5369_begin_0 = const()[name = tensor<string, []>("op_5369_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5369_end_0 = const()[name = tensor<string, []>("op_5369_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5369_end_mask_0 = const()[name = tensor<string, []>("op_5369_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5369_cast_fp16 = slice_by_index(begin = var_5369_begin_0, end = var_5369_end_0, end_mask = var_5369_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5369_cast_fp16")];
+            tensor<int32, [4]> var_5376_begin_0 = const()[name = tensor<string, []>("op_5376_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5376_end_0 = const()[name = tensor<string, []>("op_5376_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5376_end_mask_0 = const()[name = tensor<string, []>("op_5376_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5376_cast_fp16 = slice_by_index(begin = var_5376_begin_0, end = var_5376_end_0, end_mask = var_5376_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5376_cast_fp16")];
+            tensor<int32, [4]> var_5383_begin_0 = const()[name = tensor<string, []>("op_5383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5383_end_0 = const()[name = tensor<string, []>("op_5383_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5383_end_mask_0 = const()[name = tensor<string, []>("op_5383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5383_cast_fp16 = slice_by_index(begin = var_5383_begin_0, end = var_5383_end_0, end_mask = var_5383_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5383_cast_fp16")];
+            tensor<int32, [4]> k_11_perm_0 = const()[name = tensor<string, []>("k_11_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_5388_begin_0 = const()[name = tensor<string, []>("op_5388_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5388_end_0 = const()[name = tensor<string, []>("op_5388_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_5388_end_mask_0 = const()[name = tensor<string, []>("op_5388_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_6 = transpose(perm = k_11_perm_0, x = key_11_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5388_cast_fp16 = slice_by_index(begin = var_5388_begin_0, end = var_5388_end_0, end_mask = var_5388_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5388_cast_fp16")];
+            tensor<int32, [4]> var_5392_begin_0 = const()[name = tensor<string, []>("op_5392_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_5392_end_0 = const()[name = tensor<string, []>("op_5392_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_5392_end_mask_0 = const()[name = tensor<string, []>("op_5392_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16 = slice_by_index(begin = var_5392_begin_0, end = var_5392_end_0, end_mask = var_5392_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<int32, [4]> var_5396_begin_0 = const()[name = tensor<string, []>("op_5396_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_5396_end_0 = const()[name = tensor<string, []>("op_5396_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_5396_end_mask_0 = const()[name = tensor<string, []>("op_5396_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5396_cast_fp16 = slice_by_index(begin = var_5396_begin_0, end = var_5396_end_0, end_mask = var_5396_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5396_cast_fp16")];
+            tensor<int32, [4]> var_5400_begin_0 = const()[name = tensor<string, []>("op_5400_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_5400_end_0 = const()[name = tensor<string, []>("op_5400_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_5400_end_mask_0 = const()[name = tensor<string, []>("op_5400_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5400_cast_fp16 = slice_by_index(begin = var_5400_begin_0, end = var_5400_end_0, end_mask = var_5400_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5400_cast_fp16")];
+            tensor<int32, [4]> var_5404_begin_0 = const()[name = tensor<string, []>("op_5404_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_5404_end_0 = const()[name = tensor<string, []>("op_5404_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_5404_end_mask_0 = const()[name = tensor<string, []>("op_5404_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5404_cast_fp16 = slice_by_index(begin = var_5404_begin_0, end = var_5404_end_0, end_mask = var_5404_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5404_cast_fp16")];
+            tensor<int32, [4]> var_5408_begin_0 = const()[name = tensor<string, []>("op_5408_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_5408_end_0 = const()[name = tensor<string, []>("op_5408_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_5408_end_mask_0 = const()[name = tensor<string, []>("op_5408_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5408_cast_fp16 = slice_by_index(begin = var_5408_begin_0, end = var_5408_end_0, end_mask = var_5408_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5408_cast_fp16")];
+            tensor<int32, [4]> var_5412_begin_0 = const()[name = tensor<string, []>("op_5412_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_5412_end_0 = const()[name = tensor<string, []>("op_5412_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_5412_end_mask_0 = const()[name = tensor<string, []>("op_5412_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5412_cast_fp16 = slice_by_index(begin = var_5412_begin_0, end = var_5412_end_0, end_mask = var_5412_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5412_cast_fp16")];
+            tensor<int32, [4]> var_5416_begin_0 = const()[name = tensor<string, []>("op_5416_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_5416_end_0 = const()[name = tensor<string, []>("op_5416_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_5416_end_mask_0 = const()[name = tensor<string, []>("op_5416_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5416_cast_fp16 = slice_by_index(begin = var_5416_begin_0, end = var_5416_end_0, end_mask = var_5416_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5416_cast_fp16")];
+            tensor<int32, [4]> var_5420_begin_0 = const()[name = tensor<string, []>("op_5420_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_5420_end_0 = const()[name = tensor<string, []>("op_5420_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_5420_end_mask_0 = const()[name = tensor<string, []>("op_5420_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5420_cast_fp16 = slice_by_index(begin = var_5420_begin_0, end = var_5420_end_0, end_mask = var_5420_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5420_cast_fp16")];
+            tensor<int32, [4]> var_5424_begin_0 = const()[name = tensor<string, []>("op_5424_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_5424_end_0 = const()[name = tensor<string, []>("op_5424_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_5424_end_mask_0 = const()[name = tensor<string, []>("op_5424_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5424_cast_fp16 = slice_by_index(begin = var_5424_begin_0, end = var_5424_end_0, end_mask = var_5424_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5424_cast_fp16")];
+            tensor<int32, [4]> var_5428_begin_0 = const()[name = tensor<string, []>("op_5428_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_5428_end_0 = const()[name = tensor<string, []>("op_5428_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_5428_end_mask_0 = const()[name = tensor<string, []>("op_5428_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5428_cast_fp16 = slice_by_index(begin = var_5428_begin_0, end = var_5428_end_0, end_mask = var_5428_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5428_cast_fp16")];
+            tensor<int32, [4]> var_5432_begin_0 = const()[name = tensor<string, []>("op_5432_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_5432_end_0 = const()[name = tensor<string, []>("op_5432_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_5432_end_mask_0 = const()[name = tensor<string, []>("op_5432_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5432_cast_fp16 = slice_by_index(begin = var_5432_begin_0, end = var_5432_end_0, end_mask = var_5432_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5432_cast_fp16")];
+            tensor<int32, [4]> var_5434_begin_0 = const()[name = tensor<string, []>("op_5434_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5434_end_0 = const()[name = tensor<string, []>("op_5434_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5434_end_mask_0 = const()[name = tensor<string, []>("op_5434_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5434_cast_fp16 = slice_by_index(begin = var_5434_begin_0, end = var_5434_end_0, end_mask = var_5434_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5434_cast_fp16")];
+            tensor<int32, [4]> var_5438_begin_0 = const()[name = tensor<string, []>("op_5438_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5438_end_0 = const()[name = tensor<string, []>("op_5438_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5438_end_mask_0 = const()[name = tensor<string, []>("op_5438_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5438_cast_fp16 = slice_by_index(begin = var_5438_begin_0, end = var_5438_end_0, end_mask = var_5438_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<int32, [4]> var_5442_begin_0 = const()[name = tensor<string, []>("op_5442_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5442_end_0 = const()[name = tensor<string, []>("op_5442_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5442_end_mask_0 = const()[name = tensor<string, []>("op_5442_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5442_cast_fp16 = slice_by_index(begin = var_5442_begin_0, end = var_5442_end_0, end_mask = var_5442_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5442_cast_fp16")];
+            tensor<int32, [4]> var_5446_begin_0 = const()[name = tensor<string, []>("op_5446_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5446_end_0 = const()[name = tensor<string, []>("op_5446_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5446_end_mask_0 = const()[name = tensor<string, []>("op_5446_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5446_cast_fp16 = slice_by_index(begin = var_5446_begin_0, end = var_5446_end_0, end_mask = var_5446_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5446_cast_fp16")];
+            tensor<int32, [4]> var_5450_begin_0 = const()[name = tensor<string, []>("op_5450_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5450_end_0 = const()[name = tensor<string, []>("op_5450_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5450_end_mask_0 = const()[name = tensor<string, []>("op_5450_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5450_cast_fp16 = slice_by_index(begin = var_5450_begin_0, end = var_5450_end_0, end_mask = var_5450_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5450_cast_fp16")];
+            tensor<int32, [4]> var_5454_begin_0 = const()[name = tensor<string, []>("op_5454_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5454_end_0 = const()[name = tensor<string, []>("op_5454_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5454_end_mask_0 = const()[name = tensor<string, []>("op_5454_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5454_cast_fp16 = slice_by_index(begin = var_5454_begin_0, end = var_5454_end_0, end_mask = var_5454_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5454_cast_fp16")];
+            tensor<int32, [4]> var_5458_begin_0 = const()[name = tensor<string, []>("op_5458_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5458_end_0 = const()[name = tensor<string, []>("op_5458_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5458_end_mask_0 = const()[name = tensor<string, []>("op_5458_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5458_cast_fp16 = slice_by_index(begin = var_5458_begin_0, end = var_5458_end_0, end_mask = var_5458_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5458_cast_fp16")];
+            tensor<int32, [4]> var_5462_begin_0 = const()[name = tensor<string, []>("op_5462_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5462_end_0 = const()[name = tensor<string, []>("op_5462_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5462_end_mask_0 = const()[name = tensor<string, []>("op_5462_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5462_cast_fp16 = slice_by_index(begin = var_5462_begin_0, end = var_5462_end_0, end_mask = var_5462_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5462_cast_fp16")];
+            tensor<int32, [4]> var_5466_begin_0 = const()[name = tensor<string, []>("op_5466_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5466_end_0 = const()[name = tensor<string, []>("op_5466_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5466_end_mask_0 = const()[name = tensor<string, []>("op_5466_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5466_cast_fp16 = slice_by_index(begin = var_5466_begin_0, end = var_5466_end_0, end_mask = var_5466_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5466_cast_fp16")];
+            tensor<int32, [4]> var_5470_begin_0 = const()[name = tensor<string, []>("op_5470_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5470_end_0 = const()[name = tensor<string, []>("op_5470_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5470_end_mask_0 = const()[name = tensor<string, []>("op_5470_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5470_cast_fp16 = slice_by_index(begin = var_5470_begin_0, end = var_5470_end_0, end_mask = var_5470_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5470_cast_fp16")];
+            tensor<int32, [4]> var_5474_begin_0 = const()[name = tensor<string, []>("op_5474_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_5474_end_0 = const()[name = tensor<string, []>("op_5474_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_5474_end_mask_0 = const()[name = tensor<string, []>("op_5474_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5474_cast_fp16 = slice_by_index(begin = var_5474_begin_0, end = var_5474_end_0, end_mask = var_5474_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5474_cast_fp16")];
+            tensor<int32, [4]> var_5478_begin_0 = const()[name = tensor<string, []>("op_5478_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_5478_end_0 = const()[name = tensor<string, []>("op_5478_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_5478_end_mask_0 = const()[name = tensor<string, []>("op_5478_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5478_cast_fp16")];
+            tensor<string, []> var_5482_equation_0 = const()[name = tensor<string, []>("op_5482_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5482_cast_fp16 = einsum(equation = var_5482_equation_0, values = (var_5388_cast_fp16, var_5054_cast_fp16))[name = tensor<string, []>("op_5482_cast_fp16")];
+            tensor<fp16, []> var_5483_to_fp16 = const()[name = tensor<string, []>("op_5483_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_481_cast_fp16 = mul(x = var_5482_cast_fp16, y = var_5483_to_fp16)[name = tensor<string, []>("aw_chunk_481_cast_fp16")];
+            tensor<string, []> var_5486_equation_0 = const()[name = tensor<string, []>("op_5486_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5486_cast_fp16 = einsum(equation = var_5486_equation_0, values = (var_5388_cast_fp16, var_5061_cast_fp16))[name = tensor<string, []>("op_5486_cast_fp16")];
+            tensor<fp16, []> var_5487_to_fp16 = const()[name = tensor<string, []>("op_5487_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_483_cast_fp16 = mul(x = var_5486_cast_fp16, y = var_5487_to_fp16)[name = tensor<string, []>("aw_chunk_483_cast_fp16")];
+            tensor<string, []> var_5490_equation_0 = const()[name = tensor<string, []>("op_5490_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5490_cast_fp16 = einsum(equation = var_5490_equation_0, values = (var_5388_cast_fp16, var_5068_cast_fp16))[name = tensor<string, []>("op_5490_cast_fp16")];
+            tensor<fp16, []> var_5491_to_fp16 = const()[name = tensor<string, []>("op_5491_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_485_cast_fp16 = mul(x = var_5490_cast_fp16, y = var_5491_to_fp16)[name = tensor<string, []>("aw_chunk_485_cast_fp16")];
+            tensor<string, []> var_5494_equation_0 = const()[name = tensor<string, []>("op_5494_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5494_cast_fp16 = einsum(equation = var_5494_equation_0, values = (var_5388_cast_fp16, var_5075_cast_fp16))[name = tensor<string, []>("op_5494_cast_fp16")];
+            tensor<fp16, []> var_5495_to_fp16 = const()[name = tensor<string, []>("op_5495_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_487_cast_fp16 = mul(x = var_5494_cast_fp16, y = var_5495_to_fp16)[name = tensor<string, []>("aw_chunk_487_cast_fp16")];
+            tensor<string, []> var_5498_equation_0 = const()[name = tensor<string, []>("op_5498_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5498_cast_fp16 = einsum(equation = var_5498_equation_0, values = (var_5392_cast_fp16, var_5082_cast_fp16))[name = tensor<string, []>("op_5498_cast_fp16")];
+            tensor<fp16, []> var_5499_to_fp16 = const()[name = tensor<string, []>("op_5499_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_489_cast_fp16 = mul(x = var_5498_cast_fp16, y = var_5499_to_fp16)[name = tensor<string, []>("aw_chunk_489_cast_fp16")];
+            tensor<string, []> var_5502_equation_0 = const()[name = tensor<string, []>("op_5502_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5502_cast_fp16 = einsum(equation = var_5502_equation_0, values = (var_5392_cast_fp16, var_5089_cast_fp16))[name = tensor<string, []>("op_5502_cast_fp16")];
+            tensor<fp16, []> var_5503_to_fp16 = const()[name = tensor<string, []>("op_5503_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_491_cast_fp16 = mul(x = var_5502_cast_fp16, y = var_5503_to_fp16)[name = tensor<string, []>("aw_chunk_491_cast_fp16")];
+            tensor<string, []> var_5506_equation_0 = const()[name = tensor<string, []>("op_5506_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5506_cast_fp16 = einsum(equation = var_5506_equation_0, values = (var_5392_cast_fp16, var_5096_cast_fp16))[name = tensor<string, []>("op_5506_cast_fp16")];
+            tensor<fp16, []> var_5507_to_fp16 = const()[name = tensor<string, []>("op_5507_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_493_cast_fp16 = mul(x = var_5506_cast_fp16, y = var_5507_to_fp16)[name = tensor<string, []>("aw_chunk_493_cast_fp16")];
+            tensor<string, []> var_5510_equation_0 = const()[name = tensor<string, []>("op_5510_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5510_cast_fp16 = einsum(equation = var_5510_equation_0, values = (var_5392_cast_fp16, var_5103_cast_fp16))[name = tensor<string, []>("op_5510_cast_fp16")];
+            tensor<fp16, []> var_5511_to_fp16 = const()[name = tensor<string, []>("op_5511_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_495_cast_fp16 = mul(x = var_5510_cast_fp16, y = var_5511_to_fp16)[name = tensor<string, []>("aw_chunk_495_cast_fp16")];
+            tensor<string, []> var_5514_equation_0 = const()[name = tensor<string, []>("op_5514_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5514_cast_fp16 = einsum(equation = var_5514_equation_0, values = (var_5396_cast_fp16, var_5110_cast_fp16))[name = tensor<string, []>("op_5514_cast_fp16")];
+            tensor<fp16, []> var_5515_to_fp16 = const()[name = tensor<string, []>("op_5515_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_497_cast_fp16 = mul(x = var_5514_cast_fp16, y = var_5515_to_fp16)[name = tensor<string, []>("aw_chunk_497_cast_fp16")];
+            tensor<string, []> var_5518_equation_0 = const()[name = tensor<string, []>("op_5518_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5518_cast_fp16 = einsum(equation = var_5518_equation_0, values = (var_5396_cast_fp16, var_5117_cast_fp16))[name = tensor<string, []>("op_5518_cast_fp16")];
+            tensor<fp16, []> var_5519_to_fp16 = const()[name = tensor<string, []>("op_5519_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_499_cast_fp16 = mul(x = var_5518_cast_fp16, y = var_5519_to_fp16)[name = tensor<string, []>("aw_chunk_499_cast_fp16")];
+            tensor<string, []> var_5522_equation_0 = const()[name = tensor<string, []>("op_5522_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5522_cast_fp16 = einsum(equation = var_5522_equation_0, values = (var_5396_cast_fp16, var_5124_cast_fp16))[name = tensor<string, []>("op_5522_cast_fp16")];
+            tensor<fp16, []> var_5523_to_fp16 = const()[name = tensor<string, []>("op_5523_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_501_cast_fp16 = mul(x = var_5522_cast_fp16, y = var_5523_to_fp16)[name = tensor<string, []>("aw_chunk_501_cast_fp16")];
+            tensor<string, []> var_5526_equation_0 = const()[name = tensor<string, []>("op_5526_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5526_cast_fp16 = einsum(equation = var_5526_equation_0, values = (var_5396_cast_fp16, var_5131_cast_fp16))[name = tensor<string, []>("op_5526_cast_fp16")];
+            tensor<fp16, []> var_5527_to_fp16 = const()[name = tensor<string, []>("op_5527_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_503_cast_fp16 = mul(x = var_5526_cast_fp16, y = var_5527_to_fp16)[name = tensor<string, []>("aw_chunk_503_cast_fp16")];
+            tensor<string, []> var_5530_equation_0 = const()[name = tensor<string, []>("op_5530_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5530_cast_fp16 = einsum(equation = var_5530_equation_0, values = (var_5400_cast_fp16, var_5138_cast_fp16))[name = tensor<string, []>("op_5530_cast_fp16")];
+            tensor<fp16, []> var_5531_to_fp16 = const()[name = tensor<string, []>("op_5531_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_505_cast_fp16 = mul(x = var_5530_cast_fp16, y = var_5531_to_fp16)[name = tensor<string, []>("aw_chunk_505_cast_fp16")];
+            tensor<string, []> var_5534_equation_0 = const()[name = tensor<string, []>("op_5534_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5534_cast_fp16 = einsum(equation = var_5534_equation_0, values = (var_5400_cast_fp16, var_5145_cast_fp16))[name = tensor<string, []>("op_5534_cast_fp16")];
+            tensor<fp16, []> var_5535_to_fp16 = const()[name = tensor<string, []>("op_5535_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_507_cast_fp16 = mul(x = var_5534_cast_fp16, y = var_5535_to_fp16)[name = tensor<string, []>("aw_chunk_507_cast_fp16")];
+            tensor<string, []> var_5538_equation_0 = const()[name = tensor<string, []>("op_5538_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5538_cast_fp16 = einsum(equation = var_5538_equation_0, values = (var_5400_cast_fp16, var_5152_cast_fp16))[name = tensor<string, []>("op_5538_cast_fp16")];
+            tensor<fp16, []> var_5539_to_fp16 = const()[name = tensor<string, []>("op_5539_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_509_cast_fp16 = mul(x = var_5538_cast_fp16, y = var_5539_to_fp16)[name = tensor<string, []>("aw_chunk_509_cast_fp16")];
+            tensor<string, []> var_5542_equation_0 = const()[name = tensor<string, []>("op_5542_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5542_cast_fp16 = einsum(equation = var_5542_equation_0, values = (var_5400_cast_fp16, var_5159_cast_fp16))[name = tensor<string, []>("op_5542_cast_fp16")];
+            tensor<fp16, []> var_5543_to_fp16 = const()[name = tensor<string, []>("op_5543_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_511_cast_fp16 = mul(x = var_5542_cast_fp16, y = var_5543_to_fp16)[name = tensor<string, []>("aw_chunk_511_cast_fp16")];
+            tensor<string, []> var_5546_equation_0 = const()[name = tensor<string, []>("op_5546_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5546_cast_fp16 = einsum(equation = var_5546_equation_0, values = (var_5404_cast_fp16, var_5166_cast_fp16))[name = tensor<string, []>("op_5546_cast_fp16")];
+            tensor<fp16, []> var_5547_to_fp16 = const()[name = tensor<string, []>("op_5547_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_513_cast_fp16 = mul(x = var_5546_cast_fp16, y = var_5547_to_fp16)[name = tensor<string, []>("aw_chunk_513_cast_fp16")];
+            tensor<string, []> var_5550_equation_0 = const()[name = tensor<string, []>("op_5550_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5550_cast_fp16 = einsum(equation = var_5550_equation_0, values = (var_5404_cast_fp16, var_5173_cast_fp16))[name = tensor<string, []>("op_5550_cast_fp16")];
+            tensor<fp16, []> var_5551_to_fp16 = const()[name = tensor<string, []>("op_5551_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_515_cast_fp16 = mul(x = var_5550_cast_fp16, y = var_5551_to_fp16)[name = tensor<string, []>("aw_chunk_515_cast_fp16")];
+            tensor<string, []> var_5554_equation_0 = const()[name = tensor<string, []>("op_5554_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5554_cast_fp16 = einsum(equation = var_5554_equation_0, values = (var_5404_cast_fp16, var_5180_cast_fp16))[name = tensor<string, []>("op_5554_cast_fp16")];
+            tensor<fp16, []> var_5555_to_fp16 = const()[name = tensor<string, []>("op_5555_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_517_cast_fp16 = mul(x = var_5554_cast_fp16, y = var_5555_to_fp16)[name = tensor<string, []>("aw_chunk_517_cast_fp16")];
+            tensor<string, []> var_5558_equation_0 = const()[name = tensor<string, []>("op_5558_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5558_cast_fp16 = einsum(equation = var_5558_equation_0, values = (var_5404_cast_fp16, var_5187_cast_fp16))[name = tensor<string, []>("op_5558_cast_fp16")];
+            tensor<fp16, []> var_5559_to_fp16 = const()[name = tensor<string, []>("op_5559_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_519_cast_fp16 = mul(x = var_5558_cast_fp16, y = var_5559_to_fp16)[name = tensor<string, []>("aw_chunk_519_cast_fp16")];
+            tensor<string, []> var_5562_equation_0 = const()[name = tensor<string, []>("op_5562_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5562_cast_fp16 = einsum(equation = var_5562_equation_0, values = (var_5408_cast_fp16, var_5194_cast_fp16))[name = tensor<string, []>("op_5562_cast_fp16")];
+            tensor<fp16, []> var_5563_to_fp16 = const()[name = tensor<string, []>("op_5563_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_521_cast_fp16 = mul(x = var_5562_cast_fp16, y = var_5563_to_fp16)[name = tensor<string, []>("aw_chunk_521_cast_fp16")];
+            tensor<string, []> var_5566_equation_0 = const()[name = tensor<string, []>("op_5566_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5566_cast_fp16 = einsum(equation = var_5566_equation_0, values = (var_5408_cast_fp16, var_5201_cast_fp16))[name = tensor<string, []>("op_5566_cast_fp16")];
+            tensor<fp16, []> var_5567_to_fp16 = const()[name = tensor<string, []>("op_5567_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_523_cast_fp16 = mul(x = var_5566_cast_fp16, y = var_5567_to_fp16)[name = tensor<string, []>("aw_chunk_523_cast_fp16")];
+            tensor<string, []> var_5570_equation_0 = const()[name = tensor<string, []>("op_5570_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5570_cast_fp16 = einsum(equation = var_5570_equation_0, values = (var_5408_cast_fp16, var_5208_cast_fp16))[name = tensor<string, []>("op_5570_cast_fp16")];
+            tensor<fp16, []> var_5571_to_fp16 = const()[name = tensor<string, []>("op_5571_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_525_cast_fp16 = mul(x = var_5570_cast_fp16, y = var_5571_to_fp16)[name = tensor<string, []>("aw_chunk_525_cast_fp16")];
+            tensor<string, []> var_5574_equation_0 = const()[name = tensor<string, []>("op_5574_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5574_cast_fp16 = einsum(equation = var_5574_equation_0, values = (var_5408_cast_fp16, var_5215_cast_fp16))[name = tensor<string, []>("op_5574_cast_fp16")];
+            tensor<fp16, []> var_5575_to_fp16 = const()[name = tensor<string, []>("op_5575_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_527_cast_fp16 = mul(x = var_5574_cast_fp16, y = var_5575_to_fp16)[name = tensor<string, []>("aw_chunk_527_cast_fp16")];
+            tensor<string, []> var_5578_equation_0 = const()[name = tensor<string, []>("op_5578_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5578_cast_fp16 = einsum(equation = var_5578_equation_0, values = (var_5412_cast_fp16, var_5222_cast_fp16))[name = tensor<string, []>("op_5578_cast_fp16")];
+            tensor<fp16, []> var_5579_to_fp16 = const()[name = tensor<string, []>("op_5579_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_529_cast_fp16 = mul(x = var_5578_cast_fp16, y = var_5579_to_fp16)[name = tensor<string, []>("aw_chunk_529_cast_fp16")];
+            tensor<string, []> var_5582_equation_0 = const()[name = tensor<string, []>("op_5582_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5582_cast_fp16 = einsum(equation = var_5582_equation_0, values = (var_5412_cast_fp16, var_5229_cast_fp16))[name = tensor<string, []>("op_5582_cast_fp16")];
+            tensor<fp16, []> var_5583_to_fp16 = const()[name = tensor<string, []>("op_5583_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_531_cast_fp16 = mul(x = var_5582_cast_fp16, y = var_5583_to_fp16)[name = tensor<string, []>("aw_chunk_531_cast_fp16")];
+            tensor<string, []> var_5586_equation_0 = const()[name = tensor<string, []>("op_5586_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5586_cast_fp16 = einsum(equation = var_5586_equation_0, values = (var_5412_cast_fp16, var_5236_cast_fp16))[name = tensor<string, []>("op_5586_cast_fp16")];
+            tensor<fp16, []> var_5587_to_fp16 = const()[name = tensor<string, []>("op_5587_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_533_cast_fp16 = mul(x = var_5586_cast_fp16, y = var_5587_to_fp16)[name = tensor<string, []>("aw_chunk_533_cast_fp16")];
+            tensor<string, []> var_5590_equation_0 = const()[name = tensor<string, []>("op_5590_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5590_cast_fp16 = einsum(equation = var_5590_equation_0, values = (var_5412_cast_fp16, var_5243_cast_fp16))[name = tensor<string, []>("op_5590_cast_fp16")];
+            tensor<fp16, []> var_5591_to_fp16 = const()[name = tensor<string, []>("op_5591_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_535_cast_fp16 = mul(x = var_5590_cast_fp16, y = var_5591_to_fp16)[name = tensor<string, []>("aw_chunk_535_cast_fp16")];
+            tensor<string, []> var_5594_equation_0 = const()[name = tensor<string, []>("op_5594_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5594_cast_fp16 = einsum(equation = var_5594_equation_0, values = (var_5416_cast_fp16, var_5250_cast_fp16))[name = tensor<string, []>("op_5594_cast_fp16")];
+            tensor<fp16, []> var_5595_to_fp16 = const()[name = tensor<string, []>("op_5595_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_537_cast_fp16 = mul(x = var_5594_cast_fp16, y = var_5595_to_fp16)[name = tensor<string, []>("aw_chunk_537_cast_fp16")];
+            tensor<string, []> var_5598_equation_0 = const()[name = tensor<string, []>("op_5598_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5598_cast_fp16 = einsum(equation = var_5598_equation_0, values = (var_5416_cast_fp16, var_5257_cast_fp16))[name = tensor<string, []>("op_5598_cast_fp16")];
+            tensor<fp16, []> var_5599_to_fp16 = const()[name = tensor<string, []>("op_5599_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_539_cast_fp16 = mul(x = var_5598_cast_fp16, y = var_5599_to_fp16)[name = tensor<string, []>("aw_chunk_539_cast_fp16")];
+            tensor<string, []> var_5602_equation_0 = const()[name = tensor<string, []>("op_5602_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5602_cast_fp16 = einsum(equation = var_5602_equation_0, values = (var_5416_cast_fp16, var_5264_cast_fp16))[name = tensor<string, []>("op_5602_cast_fp16")];
+            tensor<fp16, []> var_5603_to_fp16 = const()[name = tensor<string, []>("op_5603_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_541_cast_fp16 = mul(x = var_5602_cast_fp16, y = var_5603_to_fp16)[name = tensor<string, []>("aw_chunk_541_cast_fp16")];
+            tensor<string, []> var_5606_equation_0 = const()[name = tensor<string, []>("op_5606_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5606_cast_fp16 = einsum(equation = var_5606_equation_0, values = (var_5416_cast_fp16, var_5271_cast_fp16))[name = tensor<string, []>("op_5606_cast_fp16")];
+            tensor<fp16, []> var_5607_to_fp16 = const()[name = tensor<string, []>("op_5607_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_543_cast_fp16 = mul(x = var_5606_cast_fp16, y = var_5607_to_fp16)[name = tensor<string, []>("aw_chunk_543_cast_fp16")];
+            tensor<string, []> var_5610_equation_0 = const()[name = tensor<string, []>("op_5610_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5610_cast_fp16 = einsum(equation = var_5610_equation_0, values = (var_5420_cast_fp16, var_5278_cast_fp16))[name = tensor<string, []>("op_5610_cast_fp16")];
+            tensor<fp16, []> var_5611_to_fp16 = const()[name = tensor<string, []>("op_5611_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_545_cast_fp16 = mul(x = var_5610_cast_fp16, y = var_5611_to_fp16)[name = tensor<string, []>("aw_chunk_545_cast_fp16")];
+            tensor<string, []> var_5614_equation_0 = const()[name = tensor<string, []>("op_5614_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5614_cast_fp16 = einsum(equation = var_5614_equation_0, values = (var_5420_cast_fp16, var_5285_cast_fp16))[name = tensor<string, []>("op_5614_cast_fp16")];
+            tensor<fp16, []> var_5615_to_fp16 = const()[name = tensor<string, []>("op_5615_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_547_cast_fp16 = mul(x = var_5614_cast_fp16, y = var_5615_to_fp16)[name = tensor<string, []>("aw_chunk_547_cast_fp16")];
+            tensor<string, []> var_5618_equation_0 = const()[name = tensor<string, []>("op_5618_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5618_cast_fp16 = einsum(equation = var_5618_equation_0, values = (var_5420_cast_fp16, var_5292_cast_fp16))[name = tensor<string, []>("op_5618_cast_fp16")];
+            tensor<fp16, []> var_5619_to_fp16 = const()[name = tensor<string, []>("op_5619_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_549_cast_fp16 = mul(x = var_5618_cast_fp16, y = var_5619_to_fp16)[name = tensor<string, []>("aw_chunk_549_cast_fp16")];
+            tensor<string, []> var_5622_equation_0 = const()[name = tensor<string, []>("op_5622_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5622_cast_fp16 = einsum(equation = var_5622_equation_0, values = (var_5420_cast_fp16, var_5299_cast_fp16))[name = tensor<string, []>("op_5622_cast_fp16")];
+            tensor<fp16, []> var_5623_to_fp16 = const()[name = tensor<string, []>("op_5623_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_551_cast_fp16 = mul(x = var_5622_cast_fp16, y = var_5623_to_fp16)[name = tensor<string, []>("aw_chunk_551_cast_fp16")];
+            tensor<string, []> var_5626_equation_0 = const()[name = tensor<string, []>("op_5626_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5626_cast_fp16 = einsum(equation = var_5626_equation_0, values = (var_5424_cast_fp16, var_5306_cast_fp16))[name = tensor<string, []>("op_5626_cast_fp16")];
+            tensor<fp16, []> var_5627_to_fp16 = const()[name = tensor<string, []>("op_5627_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_553_cast_fp16 = mul(x = var_5626_cast_fp16, y = var_5627_to_fp16)[name = tensor<string, []>("aw_chunk_553_cast_fp16")];
+            tensor<string, []> var_5630_equation_0 = const()[name = tensor<string, []>("op_5630_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5630_cast_fp16 = einsum(equation = var_5630_equation_0, values = (var_5424_cast_fp16, var_5313_cast_fp16))[name = tensor<string, []>("op_5630_cast_fp16")];
+            tensor<fp16, []> var_5631_to_fp16 = const()[name = tensor<string, []>("op_5631_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_555_cast_fp16 = mul(x = var_5630_cast_fp16, y = var_5631_to_fp16)[name = tensor<string, []>("aw_chunk_555_cast_fp16")];
+            tensor<string, []> var_5634_equation_0 = const()[name = tensor<string, []>("op_5634_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5634_cast_fp16 = einsum(equation = var_5634_equation_0, values = (var_5424_cast_fp16, var_5320_cast_fp16))[name = tensor<string, []>("op_5634_cast_fp16")];
+            tensor<fp16, []> var_5635_to_fp16 = const()[name = tensor<string, []>("op_5635_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_557_cast_fp16 = mul(x = var_5634_cast_fp16, y = var_5635_to_fp16)[name = tensor<string, []>("aw_chunk_557_cast_fp16")];
+            tensor<string, []> var_5638_equation_0 = const()[name = tensor<string, []>("op_5638_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5638_cast_fp16 = einsum(equation = var_5638_equation_0, values = (var_5424_cast_fp16, var_5327_cast_fp16))[name = tensor<string, []>("op_5638_cast_fp16")];
+            tensor<fp16, []> var_5639_to_fp16 = const()[name = tensor<string, []>("op_5639_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_559_cast_fp16 = mul(x = var_5638_cast_fp16, y = var_5639_to_fp16)[name = tensor<string, []>("aw_chunk_559_cast_fp16")];
+            tensor<string, []> var_5642_equation_0 = const()[name = tensor<string, []>("op_5642_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5642_cast_fp16 = einsum(equation = var_5642_equation_0, values = (var_5428_cast_fp16, var_5334_cast_fp16))[name = tensor<string, []>("op_5642_cast_fp16")];
+            tensor<fp16, []> var_5643_to_fp16 = const()[name = tensor<string, []>("op_5643_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_561_cast_fp16 = mul(x = var_5642_cast_fp16, y = var_5643_to_fp16)[name = tensor<string, []>("aw_chunk_561_cast_fp16")];
+            tensor<string, []> var_5646_equation_0 = const()[name = tensor<string, []>("op_5646_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5646_cast_fp16 = einsum(equation = var_5646_equation_0, values = (var_5428_cast_fp16, var_5341_cast_fp16))[name = tensor<string, []>("op_5646_cast_fp16")];
+            tensor<fp16, []> var_5647_to_fp16 = const()[name = tensor<string, []>("op_5647_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_563_cast_fp16 = mul(x = var_5646_cast_fp16, y = var_5647_to_fp16)[name = tensor<string, []>("aw_chunk_563_cast_fp16")];
+            tensor<string, []> var_5650_equation_0 = const()[name = tensor<string, []>("op_5650_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5650_cast_fp16 = einsum(equation = var_5650_equation_0, values = (var_5428_cast_fp16, var_5348_cast_fp16))[name = tensor<string, []>("op_5650_cast_fp16")];
+            tensor<fp16, []> var_5651_to_fp16 = const()[name = tensor<string, []>("op_5651_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_565_cast_fp16 = mul(x = var_5650_cast_fp16, y = var_5651_to_fp16)[name = tensor<string, []>("aw_chunk_565_cast_fp16")];
+            tensor<string, []> var_5654_equation_0 = const()[name = tensor<string, []>("op_5654_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5654_cast_fp16 = einsum(equation = var_5654_equation_0, values = (var_5428_cast_fp16, var_5355_cast_fp16))[name = tensor<string, []>("op_5654_cast_fp16")];
+            tensor<fp16, []> var_5655_to_fp16 = const()[name = tensor<string, []>("op_5655_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_567_cast_fp16 = mul(x = var_5654_cast_fp16, y = var_5655_to_fp16)[name = tensor<string, []>("aw_chunk_567_cast_fp16")];
+            tensor<string, []> var_5658_equation_0 = const()[name = tensor<string, []>("op_5658_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5658_cast_fp16 = einsum(equation = var_5658_equation_0, values = (var_5432_cast_fp16, var_5362_cast_fp16))[name = tensor<string, []>("op_5658_cast_fp16")];
+            tensor<fp16, []> var_5659_to_fp16 = const()[name = tensor<string, []>("op_5659_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_569_cast_fp16 = mul(x = var_5658_cast_fp16, y = var_5659_to_fp16)[name = tensor<string, []>("aw_chunk_569_cast_fp16")];
+            tensor<string, []> var_5662_equation_0 = const()[name = tensor<string, []>("op_5662_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5662_cast_fp16 = einsum(equation = var_5662_equation_0, values = (var_5432_cast_fp16, var_5369_cast_fp16))[name = tensor<string, []>("op_5662_cast_fp16")];
+            tensor<fp16, []> var_5663_to_fp16 = const()[name = tensor<string, []>("op_5663_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_571_cast_fp16 = mul(x = var_5662_cast_fp16, y = var_5663_to_fp16)[name = tensor<string, []>("aw_chunk_571_cast_fp16")];
+            tensor<string, []> var_5666_equation_0 = const()[name = tensor<string, []>("op_5666_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5666_cast_fp16 = einsum(equation = var_5666_equation_0, values = (var_5432_cast_fp16, var_5376_cast_fp16))[name = tensor<string, []>("op_5666_cast_fp16")];
+            tensor<fp16, []> var_5667_to_fp16 = const()[name = tensor<string, []>("op_5667_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_573_cast_fp16 = mul(x = var_5666_cast_fp16, y = var_5667_to_fp16)[name = tensor<string, []>("aw_chunk_573_cast_fp16")];
+            tensor<string, []> var_5670_equation_0 = const()[name = tensor<string, []>("op_5670_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5670_cast_fp16 = einsum(equation = var_5670_equation_0, values = (var_5432_cast_fp16, var_5383_cast_fp16))[name = tensor<string, []>("op_5670_cast_fp16")];
+            tensor<fp16, []> var_5671_to_fp16 = const()[name = tensor<string, []>("op_5671_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_575_cast_fp16 = mul(x = var_5670_cast_fp16, y = var_5671_to_fp16)[name = tensor<string, []>("aw_chunk_575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5673_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_481_cast_fp16)[name = tensor<string, []>("op_5673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5674_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_483_cast_fp16)[name = tensor<string, []>("op_5674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5675_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_485_cast_fp16)[name = tensor<string, []>("op_5675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5676_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_487_cast_fp16)[name = tensor<string, []>("op_5676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5677_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_489_cast_fp16)[name = tensor<string, []>("op_5677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5678_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_491_cast_fp16)[name = tensor<string, []>("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5679_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_493_cast_fp16)[name = tensor<string, []>("op_5679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5680_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_495_cast_fp16)[name = tensor<string, []>("op_5680_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5681_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_497_cast_fp16)[name = tensor<string, []>("op_5681_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5682_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_499_cast_fp16)[name = tensor<string, []>("op_5682_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5683_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_501_cast_fp16)[name = tensor<string, []>("op_5683_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5684_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_503_cast_fp16)[name = tensor<string, []>("op_5684_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5685_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_505_cast_fp16)[name = tensor<string, []>("op_5685_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5686_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_507_cast_fp16)[name = tensor<string, []>("op_5686_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5687_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_509_cast_fp16)[name = tensor<string, []>("op_5687_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5688_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_511_cast_fp16)[name = tensor<string, []>("op_5688_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5689_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_513_cast_fp16)[name = tensor<string, []>("op_5689_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5690_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_515_cast_fp16)[name = tensor<string, []>("op_5690_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5691_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_517_cast_fp16)[name = tensor<string, []>("op_5691_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5692_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_519_cast_fp16)[name = tensor<string, []>("op_5692_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5693_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_521_cast_fp16)[name = tensor<string, []>("op_5693_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5694_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_523_cast_fp16)[name = tensor<string, []>("op_5694_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5695_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_525_cast_fp16)[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5696_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_527_cast_fp16)[name = tensor<string, []>("op_5696_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5697_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_529_cast_fp16)[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5698_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_531_cast_fp16)[name = tensor<string, []>("op_5698_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5699_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_533_cast_fp16)[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5700_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_535_cast_fp16)[name = tensor<string, []>("op_5700_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5701_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_537_cast_fp16)[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5702_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_539_cast_fp16)[name = tensor<string, []>("op_5702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5703_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_541_cast_fp16)[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5704_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_543_cast_fp16)[name = tensor<string, []>("op_5704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5705_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_545_cast_fp16)[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5706_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_547_cast_fp16)[name = tensor<string, []>("op_5706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5707_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_549_cast_fp16)[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5708_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_551_cast_fp16)[name = tensor<string, []>("op_5708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5709_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_553_cast_fp16)[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5710_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_555_cast_fp16)[name = tensor<string, []>("op_5710_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5711_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_557_cast_fp16)[name = tensor<string, []>("op_5711_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5712_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_559_cast_fp16)[name = tensor<string, []>("op_5712_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5713_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_561_cast_fp16)[name = tensor<string, []>("op_5713_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5714_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_563_cast_fp16)[name = tensor<string, []>("op_5714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5715_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_565_cast_fp16)[name = tensor<string, []>("op_5715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5716_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_567_cast_fp16)[name = tensor<string, []>("op_5716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5717_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_569_cast_fp16)[name = tensor<string, []>("op_5717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5718_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_571_cast_fp16)[name = tensor<string, []>("op_5718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5719_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_573_cast_fp16)[name = tensor<string, []>("op_5719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5720_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_575_cast_fp16)[name = tensor<string, []>("op_5720_cast_fp16")];
+            tensor<string, []> var_5722_equation_0 = const()[name = tensor<string, []>("op_5722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5722_cast_fp16 = einsum(equation = var_5722_equation_0, values = (var_5434_cast_fp16, var_5673_cast_fp16))[name = tensor<string, []>("op_5722_cast_fp16")];
+            tensor<string, []> var_5724_equation_0 = const()[name = tensor<string, []>("op_5724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5724_cast_fp16 = einsum(equation = var_5724_equation_0, values = (var_5434_cast_fp16, var_5674_cast_fp16))[name = tensor<string, []>("op_5724_cast_fp16")];
+            tensor<string, []> var_5726_equation_0 = const()[name = tensor<string, []>("op_5726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5726_cast_fp16 = einsum(equation = var_5726_equation_0, values = (var_5434_cast_fp16, var_5675_cast_fp16))[name = tensor<string, []>("op_5726_cast_fp16")];
+            tensor<string, []> var_5728_equation_0 = const()[name = tensor<string, []>("op_5728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5728_cast_fp16 = einsum(equation = var_5728_equation_0, values = (var_5434_cast_fp16, var_5676_cast_fp16))[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<string, []> var_5730_equation_0 = const()[name = tensor<string, []>("op_5730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5730_cast_fp16 = einsum(equation = var_5730_equation_0, values = (var_5438_cast_fp16, var_5677_cast_fp16))[name = tensor<string, []>("op_5730_cast_fp16")];
+            tensor<string, []> var_5732_equation_0 = const()[name = tensor<string, []>("op_5732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5732_cast_fp16 = einsum(equation = var_5732_equation_0, values = (var_5438_cast_fp16, var_5678_cast_fp16))[name = tensor<string, []>("op_5732_cast_fp16")];
+            tensor<string, []> var_5734_equation_0 = const()[name = tensor<string, []>("op_5734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5734_cast_fp16 = einsum(equation = var_5734_equation_0, values = (var_5438_cast_fp16, var_5679_cast_fp16))[name = tensor<string, []>("op_5734_cast_fp16")];
+            tensor<string, []> var_5736_equation_0 = const()[name = tensor<string, []>("op_5736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5736_cast_fp16 = einsum(equation = var_5736_equation_0, values = (var_5438_cast_fp16, var_5680_cast_fp16))[name = tensor<string, []>("op_5736_cast_fp16")];
+            tensor<string, []> var_5738_equation_0 = const()[name = tensor<string, []>("op_5738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5738_cast_fp16 = einsum(equation = var_5738_equation_0, values = (var_5442_cast_fp16, var_5681_cast_fp16))[name = tensor<string, []>("op_5738_cast_fp16")];
+            tensor<string, []> var_5740_equation_0 = const()[name = tensor<string, []>("op_5740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5740_cast_fp16 = einsum(equation = var_5740_equation_0, values = (var_5442_cast_fp16, var_5682_cast_fp16))[name = tensor<string, []>("op_5740_cast_fp16")];
+            tensor<string, []> var_5742_equation_0 = const()[name = tensor<string, []>("op_5742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5742_cast_fp16 = einsum(equation = var_5742_equation_0, values = (var_5442_cast_fp16, var_5683_cast_fp16))[name = tensor<string, []>("op_5742_cast_fp16")];
+            tensor<string, []> var_5744_equation_0 = const()[name = tensor<string, []>("op_5744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5744_cast_fp16 = einsum(equation = var_5744_equation_0, values = (var_5442_cast_fp16, var_5684_cast_fp16))[name = tensor<string, []>("op_5744_cast_fp16")];
+            tensor<string, []> var_5746_equation_0 = const()[name = tensor<string, []>("op_5746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5746_cast_fp16 = einsum(equation = var_5746_equation_0, values = (var_5446_cast_fp16, var_5685_cast_fp16))[name = tensor<string, []>("op_5746_cast_fp16")];
+            tensor<string, []> var_5748_equation_0 = const()[name = tensor<string, []>("op_5748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5748_cast_fp16 = einsum(equation = var_5748_equation_0, values = (var_5446_cast_fp16, var_5686_cast_fp16))[name = tensor<string, []>("op_5748_cast_fp16")];
+            tensor<string, []> var_5750_equation_0 = const()[name = tensor<string, []>("op_5750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5750_cast_fp16 = einsum(equation = var_5750_equation_0, values = (var_5446_cast_fp16, var_5687_cast_fp16))[name = tensor<string, []>("op_5750_cast_fp16")];
+            tensor<string, []> var_5752_equation_0 = const()[name = tensor<string, []>("op_5752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5752_cast_fp16 = einsum(equation = var_5752_equation_0, values = (var_5446_cast_fp16, var_5688_cast_fp16))[name = tensor<string, []>("op_5752_cast_fp16")];
+            tensor<string, []> var_5754_equation_0 = const()[name = tensor<string, []>("op_5754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5754_cast_fp16 = einsum(equation = var_5754_equation_0, values = (var_5450_cast_fp16, var_5689_cast_fp16))[name = tensor<string, []>("op_5754_cast_fp16")];
+            tensor<string, []> var_5756_equation_0 = const()[name = tensor<string, []>("op_5756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5756_cast_fp16 = einsum(equation = var_5756_equation_0, values = (var_5450_cast_fp16, var_5690_cast_fp16))[name = tensor<string, []>("op_5756_cast_fp16")];
+            tensor<string, []> var_5758_equation_0 = const()[name = tensor<string, []>("op_5758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5758_cast_fp16 = einsum(equation = var_5758_equation_0, values = (var_5450_cast_fp16, var_5691_cast_fp16))[name = tensor<string, []>("op_5758_cast_fp16")];
+            tensor<string, []> var_5760_equation_0 = const()[name = tensor<string, []>("op_5760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5760_cast_fp16 = einsum(equation = var_5760_equation_0, values = (var_5450_cast_fp16, var_5692_cast_fp16))[name = tensor<string, []>("op_5760_cast_fp16")];
+            tensor<string, []> var_5762_equation_0 = const()[name = tensor<string, []>("op_5762_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5762_cast_fp16 = einsum(equation = var_5762_equation_0, values = (var_5454_cast_fp16, var_5693_cast_fp16))[name = tensor<string, []>("op_5762_cast_fp16")];
+            tensor<string, []> var_5764_equation_0 = const()[name = tensor<string, []>("op_5764_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5764_cast_fp16 = einsum(equation = var_5764_equation_0, values = (var_5454_cast_fp16, var_5694_cast_fp16))[name = tensor<string, []>("op_5764_cast_fp16")];
+            tensor<string, []> var_5766_equation_0 = const()[name = tensor<string, []>("op_5766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5766_cast_fp16 = einsum(equation = var_5766_equation_0, values = (var_5454_cast_fp16, var_5695_cast_fp16))[name = tensor<string, []>("op_5766_cast_fp16")];
+            tensor<string, []> var_5768_equation_0 = const()[name = tensor<string, []>("op_5768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5768_cast_fp16 = einsum(equation = var_5768_equation_0, values = (var_5454_cast_fp16, var_5696_cast_fp16))[name = tensor<string, []>("op_5768_cast_fp16")];
+            tensor<string, []> var_5770_equation_0 = const()[name = tensor<string, []>("op_5770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5770_cast_fp16 = einsum(equation = var_5770_equation_0, values = (var_5458_cast_fp16, var_5697_cast_fp16))[name = tensor<string, []>("op_5770_cast_fp16")];
+            tensor<string, []> var_5772_equation_0 = const()[name = tensor<string, []>("op_5772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5772_cast_fp16 = einsum(equation = var_5772_equation_0, values = (var_5458_cast_fp16, var_5698_cast_fp16))[name = tensor<string, []>("op_5772_cast_fp16")];
+            tensor<string, []> var_5774_equation_0 = const()[name = tensor<string, []>("op_5774_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5774_cast_fp16 = einsum(equation = var_5774_equation_0, values = (var_5458_cast_fp16, var_5699_cast_fp16))[name = tensor<string, []>("op_5774_cast_fp16")];
+            tensor<string, []> var_5776_equation_0 = const()[name = tensor<string, []>("op_5776_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5776_cast_fp16 = einsum(equation = var_5776_equation_0, values = (var_5458_cast_fp16, var_5700_cast_fp16))[name = tensor<string, []>("op_5776_cast_fp16")];
+            tensor<string, []> var_5778_equation_0 = const()[name = tensor<string, []>("op_5778_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5778_cast_fp16 = einsum(equation = var_5778_equation_0, values = (var_5462_cast_fp16, var_5701_cast_fp16))[name = tensor<string, []>("op_5778_cast_fp16")];
+            tensor<string, []> var_5780_equation_0 = const()[name = tensor<string, []>("op_5780_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5780_cast_fp16 = einsum(equation = var_5780_equation_0, values = (var_5462_cast_fp16, var_5702_cast_fp16))[name = tensor<string, []>("op_5780_cast_fp16")];
+            tensor<string, []> var_5782_equation_0 = const()[name = tensor<string, []>("op_5782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5782_cast_fp16 = einsum(equation = var_5782_equation_0, values = (var_5462_cast_fp16, var_5703_cast_fp16))[name = tensor<string, []>("op_5782_cast_fp16")];
+            tensor<string, []> var_5784_equation_0 = const()[name = tensor<string, []>("op_5784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5784_cast_fp16 = einsum(equation = var_5784_equation_0, values = (var_5462_cast_fp16, var_5704_cast_fp16))[name = tensor<string, []>("op_5784_cast_fp16")];
+            tensor<string, []> var_5786_equation_0 = const()[name = tensor<string, []>("op_5786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5786_cast_fp16 = einsum(equation = var_5786_equation_0, values = (var_5466_cast_fp16, var_5705_cast_fp16))[name = tensor<string, []>("op_5786_cast_fp16")];
+            tensor<string, []> var_5788_equation_0 = const()[name = tensor<string, []>("op_5788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5788_cast_fp16 = einsum(equation = var_5788_equation_0, values = (var_5466_cast_fp16, var_5706_cast_fp16))[name = tensor<string, []>("op_5788_cast_fp16")];
+            tensor<string, []> var_5790_equation_0 = const()[name = tensor<string, []>("op_5790_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5790_cast_fp16 = einsum(equation = var_5790_equation_0, values = (var_5466_cast_fp16, var_5707_cast_fp16))[name = tensor<string, []>("op_5790_cast_fp16")];
+            tensor<string, []> var_5792_equation_0 = const()[name = tensor<string, []>("op_5792_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5792_cast_fp16 = einsum(equation = var_5792_equation_0, values = (var_5466_cast_fp16, var_5708_cast_fp16))[name = tensor<string, []>("op_5792_cast_fp16")];
+            tensor<string, []> var_5794_equation_0 = const()[name = tensor<string, []>("op_5794_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5794_cast_fp16 = einsum(equation = var_5794_equation_0, values = (var_5470_cast_fp16, var_5709_cast_fp16))[name = tensor<string, []>("op_5794_cast_fp16")];
+            tensor<string, []> var_5796_equation_0 = const()[name = tensor<string, []>("op_5796_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5796_cast_fp16 = einsum(equation = var_5796_equation_0, values = (var_5470_cast_fp16, var_5710_cast_fp16))[name = tensor<string, []>("op_5796_cast_fp16")];
+            tensor<string, []> var_5798_equation_0 = const()[name = tensor<string, []>("op_5798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5798_cast_fp16 = einsum(equation = var_5798_equation_0, values = (var_5470_cast_fp16, var_5711_cast_fp16))[name = tensor<string, []>("op_5798_cast_fp16")];
+            tensor<string, []> var_5800_equation_0 = const()[name = tensor<string, []>("op_5800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5800_cast_fp16 = einsum(equation = var_5800_equation_0, values = (var_5470_cast_fp16, var_5712_cast_fp16))[name = tensor<string, []>("op_5800_cast_fp16")];
+            tensor<string, []> var_5802_equation_0 = const()[name = tensor<string, []>("op_5802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5802_cast_fp16 = einsum(equation = var_5802_equation_0, values = (var_5474_cast_fp16, var_5713_cast_fp16))[name = tensor<string, []>("op_5802_cast_fp16")];
+            tensor<string, []> var_5804_equation_0 = const()[name = tensor<string, []>("op_5804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5804_cast_fp16 = einsum(equation = var_5804_equation_0, values = (var_5474_cast_fp16, var_5714_cast_fp16))[name = tensor<string, []>("op_5804_cast_fp16")];
+            tensor<string, []> var_5806_equation_0 = const()[name = tensor<string, []>("op_5806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5806_cast_fp16 = einsum(equation = var_5806_equation_0, values = (var_5474_cast_fp16, var_5715_cast_fp16))[name = tensor<string, []>("op_5806_cast_fp16")];
+            tensor<string, []> var_5808_equation_0 = const()[name = tensor<string, []>("op_5808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5808_cast_fp16 = einsum(equation = var_5808_equation_0, values = (var_5474_cast_fp16, var_5716_cast_fp16))[name = tensor<string, []>("op_5808_cast_fp16")];
+            tensor<string, []> var_5810_equation_0 = const()[name = tensor<string, []>("op_5810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5810_cast_fp16 = einsum(equation = var_5810_equation_0, values = (var_5478_cast_fp16, var_5717_cast_fp16))[name = tensor<string, []>("op_5810_cast_fp16")];
+            tensor<string, []> var_5812_equation_0 = const()[name = tensor<string, []>("op_5812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5812_cast_fp16 = einsum(equation = var_5812_equation_0, values = (var_5478_cast_fp16, var_5718_cast_fp16))[name = tensor<string, []>("op_5812_cast_fp16")];
+            tensor<string, []> var_5814_equation_0 = const()[name = tensor<string, []>("op_5814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5814_cast_fp16 = einsum(equation = var_5814_equation_0, values = (var_5478_cast_fp16, var_5719_cast_fp16))[name = tensor<string, []>("op_5814_cast_fp16")];
+            tensor<string, []> var_5816_equation_0 = const()[name = tensor<string, []>("op_5816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5816_cast_fp16 = einsum(equation = var_5816_equation_0, values = (var_5478_cast_fp16, var_5720_cast_fp16))[name = tensor<string, []>("op_5816_cast_fp16")];
+            tensor<bool, []> var_5818_interleave_0 = const()[name = tensor<string, []>("op_5818_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5818_cast_fp16 = concat(axis = var_4929, interleave = var_5818_interleave_0, values = (var_5722_cast_fp16, var_5724_cast_fp16, var_5726_cast_fp16, var_5728_cast_fp16))[name = tensor<string, []>("op_5818_cast_fp16")];
+            tensor<bool, []> var_5820_interleave_0 = const()[name = tensor<string, []>("op_5820_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5820_cast_fp16 = concat(axis = var_4929, interleave = var_5820_interleave_0, values = (var_5730_cast_fp16, var_5732_cast_fp16, var_5734_cast_fp16, var_5736_cast_fp16))[name = tensor<string, []>("op_5820_cast_fp16")];
+            tensor<bool, []> var_5822_interleave_0 = const()[name = tensor<string, []>("op_5822_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5822_cast_fp16 = concat(axis = var_4929, interleave = var_5822_interleave_0, values = (var_5738_cast_fp16, var_5740_cast_fp16, var_5742_cast_fp16, var_5744_cast_fp16))[name = tensor<string, []>("op_5822_cast_fp16")];
+            tensor<bool, []> var_5824_interleave_0 = const()[name = tensor<string, []>("op_5824_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5824_cast_fp16 = concat(axis = var_4929, interleave = var_5824_interleave_0, values = (var_5746_cast_fp16, var_5748_cast_fp16, var_5750_cast_fp16, var_5752_cast_fp16))[name = tensor<string, []>("op_5824_cast_fp16")];
+            tensor<bool, []> var_5826_interleave_0 = const()[name = tensor<string, []>("op_5826_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5826_cast_fp16 = concat(axis = var_4929, interleave = var_5826_interleave_0, values = (var_5754_cast_fp16, var_5756_cast_fp16, var_5758_cast_fp16, var_5760_cast_fp16))[name = tensor<string, []>("op_5826_cast_fp16")];
+            tensor<bool, []> var_5828_interleave_0 = const()[name = tensor<string, []>("op_5828_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5828_cast_fp16 = concat(axis = var_4929, interleave = var_5828_interleave_0, values = (var_5762_cast_fp16, var_5764_cast_fp16, var_5766_cast_fp16, var_5768_cast_fp16))[name = tensor<string, []>("op_5828_cast_fp16")];
+            tensor<bool, []> var_5830_interleave_0 = const()[name = tensor<string, []>("op_5830_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5830_cast_fp16 = concat(axis = var_4929, interleave = var_5830_interleave_0, values = (var_5770_cast_fp16, var_5772_cast_fp16, var_5774_cast_fp16, var_5776_cast_fp16))[name = tensor<string, []>("op_5830_cast_fp16")];
+            tensor<bool, []> var_5832_interleave_0 = const()[name = tensor<string, []>("op_5832_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5832_cast_fp16 = concat(axis = var_4929, interleave = var_5832_interleave_0, values = (var_5778_cast_fp16, var_5780_cast_fp16, var_5782_cast_fp16, var_5784_cast_fp16))[name = tensor<string, []>("op_5832_cast_fp16")];
+            tensor<bool, []> var_5834_interleave_0 = const()[name = tensor<string, []>("op_5834_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5834_cast_fp16 = concat(axis = var_4929, interleave = var_5834_interleave_0, values = (var_5786_cast_fp16, var_5788_cast_fp16, var_5790_cast_fp16, var_5792_cast_fp16))[name = tensor<string, []>("op_5834_cast_fp16")];
+            tensor<bool, []> var_5836_interleave_0 = const()[name = tensor<string, []>("op_5836_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5836_cast_fp16 = concat(axis = var_4929, interleave = var_5836_interleave_0, values = (var_5794_cast_fp16, var_5796_cast_fp16, var_5798_cast_fp16, var_5800_cast_fp16))[name = tensor<string, []>("op_5836_cast_fp16")];
+            tensor<bool, []> var_5838_interleave_0 = const()[name = tensor<string, []>("op_5838_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5838_cast_fp16 = concat(axis = var_4929, interleave = var_5838_interleave_0, values = (var_5802_cast_fp16, var_5804_cast_fp16, var_5806_cast_fp16, var_5808_cast_fp16))[name = tensor<string, []>("op_5838_cast_fp16")];
+            tensor<bool, []> var_5840_interleave_0 = const()[name = tensor<string, []>("op_5840_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5840_cast_fp16 = concat(axis = var_4929, interleave = var_5840_interleave_0, values = (var_5810_cast_fp16, var_5812_cast_fp16, var_5814_cast_fp16, var_5816_cast_fp16))[name = tensor<string, []>("op_5840_cast_fp16")];
+            tensor<bool, []> input_41_interleave_0 = const()[name = tensor<string, []>("input_41_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_41_cast_fp16 = concat(axis = var_4946, interleave = input_41_interleave_0, values = (var_5818_cast_fp16, var_5820_cast_fp16, var_5822_cast_fp16, var_5824_cast_fp16, var_5826_cast_fp16, var_5828_cast_fp16, var_5830_cast_fp16, var_5832_cast_fp16, var_5834_cast_fp16, var_5836_cast_fp16, var_5838_cast_fp16, var_5840_cast_fp16))[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_5845 = const()[name = tensor<string, []>("op_5845"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5847 = const()[name = tensor<string, []>("op_5847"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80639616)))];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81819328)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_23_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_5847, groups = var_4946, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = var_5845, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_5853 = const()[name = tensor<string, []>("op_5853"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_5853, keep_dims = var_4947, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_5857 = const()[name = tensor<string, []>("op_5857"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5858_cast_fp16 = reduce_mean(axes = var_5857, keep_dims = var_4947, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_5858_cast_fp16")];
+            tensor<fp16, []> var_5859_to_fp16 = const()[name = tensor<string, []>("op_5859_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_5860_cast_fp16 = add(x = var_5858_cast_fp16, y = var_5859_to_fp16)[name = tensor<string, []>("op_5860_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_5860_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81820928)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81822528)))];
+            tensor<fp16, []> input_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_5871 = const()[name = tensor<string, []>("op_5871"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5873 = const()[name = tensor<string, []>("op_5873"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_45_pad_type_0 = const()[name = tensor<string, []>("input_45_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_45_pad_0 = const()[name = tensor<string, []>("input_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81824128)))];
+            tensor<fp16, [3072]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86542784)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_5873, groups = var_4946, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = var_5871, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<int32, [2]> var_5879 = const()[name = tensor<string, []>("op_5879"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5881 = const()[name = tensor<string, []>("op_5881"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86548992)))];
+            tensor<fp16, [768]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91267648)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_15_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_5881, groups = var_4946, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = var_5879, weight = layers_5_fc2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_5888 = const()[name = tensor<string, []>("op_5888"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_5905 = const()[name = tensor<string, []>("op_5905"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_5906 = const()[name = tensor<string, []>("op_5906"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_5916 = const()[name = tensor<string, []>("op_5916"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_25_cast_fp16 = reduce_mean(axes = var_5916, keep_dims = var_5906, x = inputs_25_cast_fp16)[name = tensor<string, []>("channels_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_sq_25_cast_fp16")];
+            tensor<int32, [1]> var_5920 = const()[name = tensor<string, []>("op_5920"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5921_cast_fp16 = reduce_mean(axes = var_5920, keep_dims = var_5906, x = zero_mean_sq_25_cast_fp16)[name = tensor<string, []>("op_5921_cast_fp16")];
+            tensor<fp16, []> var_5922_to_fp16 = const()[name = tensor<string, []>("op_5922_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_5923_cast_fp16 = add(x = var_5921_cast_fp16, y = var_5922_to_fp16)[name = tensor<string, []>("op_5923_cast_fp16")];
+            tensor<fp16, []> denom_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_5923_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91269248)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91270848)))];
+            tensor<fp16, []> obj_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<int32, [2]> var_5938 = const()[name = tensor<string, []>("op_5938"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5940 = const()[name = tensor<string, []>("op_5940"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91272448)))];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92452160)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_13_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = var_5940, groups = var_5905, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_5938, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_5944 = const()[name = tensor<string, []>("op_5944"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5946 = const()[name = tensor<string, []>("op_5946"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_13_pad_type_0 = const()[name = tensor<string, []>("key_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_13_pad_0 = const()[name = tensor<string, []>("key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92453760)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_13_cast_fp16 = conv(dilations = var_5946, groups = var_5905, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = var_5944, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<int32, [2]> var_5951 = const()[name = tensor<string, []>("op_5951"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5953 = const()[name = tensor<string, []>("op_5953"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_13_pad_type_0 = const()[name = tensor<string, []>("value_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_13_pad_0 = const()[name = tensor<string, []>("value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93633472)))];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94813184)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = var_5953, groups = var_5905, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = var_5951, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_5960_begin_0 = const()[name = tensor<string, []>("op_5960_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5960_end_0 = const()[name = tensor<string, []>("op_5960_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5960_end_mask_0 = const()[name = tensor<string, []>("op_5960_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5960_cast_fp16 = slice_by_index(begin = var_5960_begin_0, end = var_5960_end_0, end_mask = var_5960_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5960_cast_fp16")];
+            tensor<int32, [4]> var_5964_begin_0 = const()[name = tensor<string, []>("op_5964_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5964_end_0 = const()[name = tensor<string, []>("op_5964_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5964_end_mask_0 = const()[name = tensor<string, []>("op_5964_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = var_5964_end_0, end_mask = var_5964_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5964_cast_fp16")];
+            tensor<int32, [4]> var_5968_begin_0 = const()[name = tensor<string, []>("op_5968_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5968_end_0 = const()[name = tensor<string, []>("op_5968_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5968_end_mask_0 = const()[name = tensor<string, []>("op_5968_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5968_cast_fp16 = slice_by_index(begin = var_5968_begin_0, end = var_5968_end_0, end_mask = var_5968_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5968_cast_fp16")];
+            tensor<int32, [4]> var_5972_begin_0 = const()[name = tensor<string, []>("op_5972_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5972_end_0 = const()[name = tensor<string, []>("op_5972_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5972_end_mask_0 = const()[name = tensor<string, []>("op_5972_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5972_cast_fp16 = slice_by_index(begin = var_5972_begin_0, end = var_5972_end_0, end_mask = var_5972_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5972_cast_fp16")];
+            tensor<int32, [4]> var_5976_begin_0 = const()[name = tensor<string, []>("op_5976_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5976_end_0 = const()[name = tensor<string, []>("op_5976_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5976_end_mask_0 = const()[name = tensor<string, []>("op_5976_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5976_cast_fp16 = slice_by_index(begin = var_5976_begin_0, end = var_5976_end_0, end_mask = var_5976_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5976_cast_fp16")];
+            tensor<int32, [4]> var_5980_begin_0 = const()[name = tensor<string, []>("op_5980_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5980_end_0 = const()[name = tensor<string, []>("op_5980_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5980_end_mask_0 = const()[name = tensor<string, []>("op_5980_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5980_cast_fp16 = slice_by_index(begin = var_5980_begin_0, end = var_5980_end_0, end_mask = var_5980_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5980_cast_fp16")];
+            tensor<int32, [4]> var_5984_begin_0 = const()[name = tensor<string, []>("op_5984_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5984_end_0 = const()[name = tensor<string, []>("op_5984_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5984_end_mask_0 = const()[name = tensor<string, []>("op_5984_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5984_cast_fp16 = slice_by_index(begin = var_5984_begin_0, end = var_5984_end_0, end_mask = var_5984_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5984_cast_fp16")];
+            tensor<int32, [4]> var_5988_begin_0 = const()[name = tensor<string, []>("op_5988_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5988_end_0 = const()[name = tensor<string, []>("op_5988_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5988_end_mask_0 = const()[name = tensor<string, []>("op_5988_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5988_cast_fp16 = slice_by_index(begin = var_5988_begin_0, end = var_5988_end_0, end_mask = var_5988_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5988_cast_fp16")];
+            tensor<int32, [4]> var_5992_begin_0 = const()[name = tensor<string, []>("op_5992_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5992_end_0 = const()[name = tensor<string, []>("op_5992_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5992_end_mask_0 = const()[name = tensor<string, []>("op_5992_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5992_cast_fp16 = slice_by_index(begin = var_5992_begin_0, end = var_5992_end_0, end_mask = var_5992_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5992_cast_fp16")];
+            tensor<int32, [4]> var_5996_begin_0 = const()[name = tensor<string, []>("op_5996_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5996_end_0 = const()[name = tensor<string, []>("op_5996_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5996_end_mask_0 = const()[name = tensor<string, []>("op_5996_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5996_cast_fp16 = slice_by_index(begin = var_5996_begin_0, end = var_5996_end_0, end_mask = var_5996_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5996_cast_fp16")];
+            tensor<int32, [4]> var_6000_begin_0 = const()[name = tensor<string, []>("op_6000_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6000_end_0 = const()[name = tensor<string, []>("op_6000_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6000_end_mask_0 = const()[name = tensor<string, []>("op_6000_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6000_cast_fp16 = slice_by_index(begin = var_6000_begin_0, end = var_6000_end_0, end_mask = var_6000_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_6000_cast_fp16")];
+            tensor<int32, [4]> var_6004_begin_0 = const()[name = tensor<string, []>("op_6004_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6004_end_0 = const()[name = tensor<string, []>("op_6004_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6004_end_mask_0 = const()[name = tensor<string, []>("op_6004_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6004_cast_fp16 = slice_by_index(begin = var_6004_begin_0, end = var_6004_end_0, end_mask = var_6004_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_6004_cast_fp16")];
+            tensor<int32, [4]> var_6013_begin_0 = const()[name = tensor<string, []>("op_6013_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6013_end_0 = const()[name = tensor<string, []>("op_6013_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6013_end_mask_0 = const()[name = tensor<string, []>("op_6013_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6013_cast_fp16 = slice_by_index(begin = var_6013_begin_0, end = var_6013_end_0, end_mask = var_6013_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6013_cast_fp16")];
+            tensor<int32, [4]> var_6020_begin_0 = const()[name = tensor<string, []>("op_6020_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6020_end_0 = const()[name = tensor<string, []>("op_6020_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6020_end_mask_0 = const()[name = tensor<string, []>("op_6020_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6020_cast_fp16 = slice_by_index(begin = var_6020_begin_0, end = var_6020_end_0, end_mask = var_6020_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6020_cast_fp16")];
+            tensor<int32, [4]> var_6027_begin_0 = const()[name = tensor<string, []>("op_6027_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6027_end_0 = const()[name = tensor<string, []>("op_6027_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6027_end_mask_0 = const()[name = tensor<string, []>("op_6027_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6027_cast_fp16 = slice_by_index(begin = var_6027_begin_0, end = var_6027_end_0, end_mask = var_6027_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6027_cast_fp16")];
+            tensor<int32, [4]> var_6034_begin_0 = const()[name = tensor<string, []>("op_6034_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6034_end_0 = const()[name = tensor<string, []>("op_6034_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6034_end_mask_0 = const()[name = tensor<string, []>("op_6034_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6034_cast_fp16 = slice_by_index(begin = var_6034_begin_0, end = var_6034_end_0, end_mask = var_6034_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6034_cast_fp16")];
+            tensor<int32, [4]> var_6041_begin_0 = const()[name = tensor<string, []>("op_6041_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6041_end_0 = const()[name = tensor<string, []>("op_6041_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6041_end_mask_0 = const()[name = tensor<string, []>("op_6041_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6041_cast_fp16 = slice_by_index(begin = var_6041_begin_0, end = var_6041_end_0, end_mask = var_6041_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6041_cast_fp16")];
+            tensor<int32, [4]> var_6048_begin_0 = const()[name = tensor<string, []>("op_6048_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6048_end_0 = const()[name = tensor<string, []>("op_6048_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6048_end_mask_0 = const()[name = tensor<string, []>("op_6048_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6048_cast_fp16 = slice_by_index(begin = var_6048_begin_0, end = var_6048_end_0, end_mask = var_6048_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6048_cast_fp16")];
+            tensor<int32, [4]> var_6055_begin_0 = const()[name = tensor<string, []>("op_6055_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6055_end_0 = const()[name = tensor<string, []>("op_6055_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6055_end_mask_0 = const()[name = tensor<string, []>("op_6055_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6055_cast_fp16 = slice_by_index(begin = var_6055_begin_0, end = var_6055_end_0, end_mask = var_6055_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6055_cast_fp16")];
+            tensor<int32, [4]> var_6062_begin_0 = const()[name = tensor<string, []>("op_6062_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6062_end_0 = const()[name = tensor<string, []>("op_6062_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6062_end_mask_0 = const()[name = tensor<string, []>("op_6062_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6062_cast_fp16 = slice_by_index(begin = var_6062_begin_0, end = var_6062_end_0, end_mask = var_6062_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6062_cast_fp16")];
+            tensor<int32, [4]> var_6069_begin_0 = const()[name = tensor<string, []>("op_6069_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6069_end_0 = const()[name = tensor<string, []>("op_6069_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6069_end_mask_0 = const()[name = tensor<string, []>("op_6069_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6069_cast_fp16 = slice_by_index(begin = var_6069_begin_0, end = var_6069_end_0, end_mask = var_6069_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6069_cast_fp16")];
+            tensor<int32, [4]> var_6076_begin_0 = const()[name = tensor<string, []>("op_6076_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6076_end_0 = const()[name = tensor<string, []>("op_6076_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6076_end_mask_0 = const()[name = tensor<string, []>("op_6076_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6076_cast_fp16 = slice_by_index(begin = var_6076_begin_0, end = var_6076_end_0, end_mask = var_6076_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6076_cast_fp16")];
+            tensor<int32, [4]> var_6083_begin_0 = const()[name = tensor<string, []>("op_6083_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6083_end_0 = const()[name = tensor<string, []>("op_6083_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6083_end_mask_0 = const()[name = tensor<string, []>("op_6083_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6083_cast_fp16 = slice_by_index(begin = var_6083_begin_0, end = var_6083_end_0, end_mask = var_6083_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6083_cast_fp16")];
+            tensor<int32, [4]> var_6090_begin_0 = const()[name = tensor<string, []>("op_6090_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6090_end_0 = const()[name = tensor<string, []>("op_6090_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6090_end_mask_0 = const()[name = tensor<string, []>("op_6090_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6090_cast_fp16 = slice_by_index(begin = var_6090_begin_0, end = var_6090_end_0, end_mask = var_6090_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6090_cast_fp16")];
+            tensor<int32, [4]> var_6097_begin_0 = const()[name = tensor<string, []>("op_6097_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6097_end_0 = const()[name = tensor<string, []>("op_6097_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6097_end_mask_0 = const()[name = tensor<string, []>("op_6097_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6097_cast_fp16 = slice_by_index(begin = var_6097_begin_0, end = var_6097_end_0, end_mask = var_6097_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6097_cast_fp16")];
+            tensor<int32, [4]> var_6104_begin_0 = const()[name = tensor<string, []>("op_6104_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6104_end_0 = const()[name = tensor<string, []>("op_6104_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6104_end_mask_0 = const()[name = tensor<string, []>("op_6104_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6104_cast_fp16 = slice_by_index(begin = var_6104_begin_0, end = var_6104_end_0, end_mask = var_6104_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6104_cast_fp16")];
+            tensor<int32, [4]> var_6111_begin_0 = const()[name = tensor<string, []>("op_6111_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6111_end_0 = const()[name = tensor<string, []>("op_6111_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6111_end_mask_0 = const()[name = tensor<string, []>("op_6111_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6111_cast_fp16 = slice_by_index(begin = var_6111_begin_0, end = var_6111_end_0, end_mask = var_6111_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6111_cast_fp16")];
+            tensor<int32, [4]> var_6118_begin_0 = const()[name = tensor<string, []>("op_6118_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6118_end_0 = const()[name = tensor<string, []>("op_6118_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6118_end_mask_0 = const()[name = tensor<string, []>("op_6118_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6118_cast_fp16 = slice_by_index(begin = var_6118_begin_0, end = var_6118_end_0, end_mask = var_6118_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6118_cast_fp16")];
+            tensor<int32, [4]> var_6125_begin_0 = const()[name = tensor<string, []>("op_6125_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6125_end_0 = const()[name = tensor<string, []>("op_6125_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6125_end_mask_0 = const()[name = tensor<string, []>("op_6125_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6125_cast_fp16 = slice_by_index(begin = var_6125_begin_0, end = var_6125_end_0, end_mask = var_6125_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6125_cast_fp16")];
+            tensor<int32, [4]> var_6132_begin_0 = const()[name = tensor<string, []>("op_6132_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6132_end_0 = const()[name = tensor<string, []>("op_6132_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6132_end_mask_0 = const()[name = tensor<string, []>("op_6132_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6132_cast_fp16 = slice_by_index(begin = var_6132_begin_0, end = var_6132_end_0, end_mask = var_6132_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6132_cast_fp16")];
+            tensor<int32, [4]> var_6139_begin_0 = const()[name = tensor<string, []>("op_6139_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6139_end_0 = const()[name = tensor<string, []>("op_6139_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6139_end_mask_0 = const()[name = tensor<string, []>("op_6139_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6139_cast_fp16 = slice_by_index(begin = var_6139_begin_0, end = var_6139_end_0, end_mask = var_6139_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6139_cast_fp16")];
+            tensor<int32, [4]> var_6146_begin_0 = const()[name = tensor<string, []>("op_6146_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6146_end_0 = const()[name = tensor<string, []>("op_6146_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6146_end_mask_0 = const()[name = tensor<string, []>("op_6146_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6146_cast_fp16 = slice_by_index(begin = var_6146_begin_0, end = var_6146_end_0, end_mask = var_6146_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6146_cast_fp16")];
+            tensor<int32, [4]> var_6153_begin_0 = const()[name = tensor<string, []>("op_6153_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6153_end_0 = const()[name = tensor<string, []>("op_6153_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6153_end_mask_0 = const()[name = tensor<string, []>("op_6153_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6153_cast_fp16 = slice_by_index(begin = var_6153_begin_0, end = var_6153_end_0, end_mask = var_6153_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6153_cast_fp16")];
+            tensor<int32, [4]> var_6160_begin_0 = const()[name = tensor<string, []>("op_6160_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6160_end_0 = const()[name = tensor<string, []>("op_6160_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6160_end_mask_0 = const()[name = tensor<string, []>("op_6160_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6160_cast_fp16 = slice_by_index(begin = var_6160_begin_0, end = var_6160_end_0, end_mask = var_6160_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6160_cast_fp16")];
+            tensor<int32, [4]> var_6167_begin_0 = const()[name = tensor<string, []>("op_6167_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6167_end_0 = const()[name = tensor<string, []>("op_6167_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6167_end_mask_0 = const()[name = tensor<string, []>("op_6167_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6167_cast_fp16 = slice_by_index(begin = var_6167_begin_0, end = var_6167_end_0, end_mask = var_6167_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6167_cast_fp16")];
+            tensor<int32, [4]> var_6174_begin_0 = const()[name = tensor<string, []>("op_6174_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6174_end_0 = const()[name = tensor<string, []>("op_6174_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6174_end_mask_0 = const()[name = tensor<string, []>("op_6174_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6174_cast_fp16 = slice_by_index(begin = var_6174_begin_0, end = var_6174_end_0, end_mask = var_6174_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6174_cast_fp16")];
+            tensor<int32, [4]> var_6181_begin_0 = const()[name = tensor<string, []>("op_6181_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6181_end_0 = const()[name = tensor<string, []>("op_6181_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6181_end_mask_0 = const()[name = tensor<string, []>("op_6181_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6181_cast_fp16 = slice_by_index(begin = var_6181_begin_0, end = var_6181_end_0, end_mask = var_6181_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6181_cast_fp16")];
+            tensor<int32, [4]> var_6188_begin_0 = const()[name = tensor<string, []>("op_6188_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6188_end_0 = const()[name = tensor<string, []>("op_6188_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6188_end_mask_0 = const()[name = tensor<string, []>("op_6188_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6188_cast_fp16 = slice_by_index(begin = var_6188_begin_0, end = var_6188_end_0, end_mask = var_6188_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6188_cast_fp16")];
+            tensor<int32, [4]> var_6195_begin_0 = const()[name = tensor<string, []>("op_6195_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6195_end_0 = const()[name = tensor<string, []>("op_6195_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6195_end_mask_0 = const()[name = tensor<string, []>("op_6195_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6195_cast_fp16 = slice_by_index(begin = var_6195_begin_0, end = var_6195_end_0, end_mask = var_6195_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6195_cast_fp16")];
+            tensor<int32, [4]> var_6202_begin_0 = const()[name = tensor<string, []>("op_6202_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6202_end_0 = const()[name = tensor<string, []>("op_6202_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6202_end_mask_0 = const()[name = tensor<string, []>("op_6202_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6202_cast_fp16 = slice_by_index(begin = var_6202_begin_0, end = var_6202_end_0, end_mask = var_6202_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6202_cast_fp16")];
+            tensor<int32, [4]> var_6209_begin_0 = const()[name = tensor<string, []>("op_6209_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6209_end_0 = const()[name = tensor<string, []>("op_6209_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6209_end_mask_0 = const()[name = tensor<string, []>("op_6209_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6209_cast_fp16 = slice_by_index(begin = var_6209_begin_0, end = var_6209_end_0, end_mask = var_6209_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6209_cast_fp16")];
+            tensor<int32, [4]> var_6216_begin_0 = const()[name = tensor<string, []>("op_6216_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6216_end_0 = const()[name = tensor<string, []>("op_6216_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6216_end_mask_0 = const()[name = tensor<string, []>("op_6216_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6216_cast_fp16 = slice_by_index(begin = var_6216_begin_0, end = var_6216_end_0, end_mask = var_6216_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6216_cast_fp16")];
+            tensor<int32, [4]> var_6223_begin_0 = const()[name = tensor<string, []>("op_6223_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6223_end_0 = const()[name = tensor<string, []>("op_6223_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6223_end_mask_0 = const()[name = tensor<string, []>("op_6223_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6223_cast_fp16 = slice_by_index(begin = var_6223_begin_0, end = var_6223_end_0, end_mask = var_6223_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6223_cast_fp16")];
+            tensor<int32, [4]> var_6230_begin_0 = const()[name = tensor<string, []>("op_6230_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6230_end_0 = const()[name = tensor<string, []>("op_6230_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6230_end_mask_0 = const()[name = tensor<string, []>("op_6230_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6230_cast_fp16 = slice_by_index(begin = var_6230_begin_0, end = var_6230_end_0, end_mask = var_6230_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6230_cast_fp16")];
+            tensor<int32, [4]> var_6237_begin_0 = const()[name = tensor<string, []>("op_6237_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6237_end_0 = const()[name = tensor<string, []>("op_6237_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6237_end_mask_0 = const()[name = tensor<string, []>("op_6237_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6237_cast_fp16 = slice_by_index(begin = var_6237_begin_0, end = var_6237_end_0, end_mask = var_6237_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6237_cast_fp16")];
+            tensor<int32, [4]> var_6244_begin_0 = const()[name = tensor<string, []>("op_6244_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6244_end_0 = const()[name = tensor<string, []>("op_6244_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6244_end_mask_0 = const()[name = tensor<string, []>("op_6244_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6244_cast_fp16 = slice_by_index(begin = var_6244_begin_0, end = var_6244_end_0, end_mask = var_6244_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6244_cast_fp16")];
+            tensor<int32, [4]> var_6251_begin_0 = const()[name = tensor<string, []>("op_6251_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6251_end_0 = const()[name = tensor<string, []>("op_6251_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6251_end_mask_0 = const()[name = tensor<string, []>("op_6251_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6251_cast_fp16 = slice_by_index(begin = var_6251_begin_0, end = var_6251_end_0, end_mask = var_6251_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6251_cast_fp16")];
+            tensor<int32, [4]> var_6258_begin_0 = const()[name = tensor<string, []>("op_6258_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6258_end_0 = const()[name = tensor<string, []>("op_6258_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6258_end_mask_0 = const()[name = tensor<string, []>("op_6258_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6258_cast_fp16 = slice_by_index(begin = var_6258_begin_0, end = var_6258_end_0, end_mask = var_6258_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6258_cast_fp16")];
+            tensor<int32, [4]> var_6265_begin_0 = const()[name = tensor<string, []>("op_6265_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6265_end_0 = const()[name = tensor<string, []>("op_6265_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6265_end_mask_0 = const()[name = tensor<string, []>("op_6265_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6265_cast_fp16 = slice_by_index(begin = var_6265_begin_0, end = var_6265_end_0, end_mask = var_6265_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6265_cast_fp16")];
+            tensor<int32, [4]> var_6272_begin_0 = const()[name = tensor<string, []>("op_6272_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6272_end_0 = const()[name = tensor<string, []>("op_6272_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6272_end_mask_0 = const()[name = tensor<string, []>("op_6272_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6272_cast_fp16 = slice_by_index(begin = var_6272_begin_0, end = var_6272_end_0, end_mask = var_6272_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6272_cast_fp16")];
+            tensor<int32, [4]> var_6279_begin_0 = const()[name = tensor<string, []>("op_6279_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6279_end_0 = const()[name = tensor<string, []>("op_6279_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6279_end_mask_0 = const()[name = tensor<string, []>("op_6279_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6279_cast_fp16 = slice_by_index(begin = var_6279_begin_0, end = var_6279_end_0, end_mask = var_6279_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6279_cast_fp16")];
+            tensor<int32, [4]> var_6286_begin_0 = const()[name = tensor<string, []>("op_6286_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6286_end_0 = const()[name = tensor<string, []>("op_6286_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6286_end_mask_0 = const()[name = tensor<string, []>("op_6286_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6286_cast_fp16 = slice_by_index(begin = var_6286_begin_0, end = var_6286_end_0, end_mask = var_6286_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6286_cast_fp16")];
+            tensor<int32, [4]> var_6293_begin_0 = const()[name = tensor<string, []>("op_6293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6293_end_0 = const()[name = tensor<string, []>("op_6293_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6293_end_mask_0 = const()[name = tensor<string, []>("op_6293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6293_cast_fp16 = slice_by_index(begin = var_6293_begin_0, end = var_6293_end_0, end_mask = var_6293_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6293_cast_fp16")];
+            tensor<int32, [4]> var_6300_begin_0 = const()[name = tensor<string, []>("op_6300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6300_end_0 = const()[name = tensor<string, []>("op_6300_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6300_end_mask_0 = const()[name = tensor<string, []>("op_6300_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6300_cast_fp16 = slice_by_index(begin = var_6300_begin_0, end = var_6300_end_0, end_mask = var_6300_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6300_cast_fp16")];
+            tensor<int32, [4]> var_6307_begin_0 = const()[name = tensor<string, []>("op_6307_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6307_end_0 = const()[name = tensor<string, []>("op_6307_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6307_end_mask_0 = const()[name = tensor<string, []>("op_6307_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6307_cast_fp16 = slice_by_index(begin = var_6307_begin_0, end = var_6307_end_0, end_mask = var_6307_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6307_cast_fp16")];
+            tensor<int32, [4]> var_6314_begin_0 = const()[name = tensor<string, []>("op_6314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6314_end_0 = const()[name = tensor<string, []>("op_6314_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6314_end_mask_0 = const()[name = tensor<string, []>("op_6314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6314_cast_fp16 = slice_by_index(begin = var_6314_begin_0, end = var_6314_end_0, end_mask = var_6314_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6314_cast_fp16")];
+            tensor<int32, [4]> var_6321_begin_0 = const()[name = tensor<string, []>("op_6321_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6321_end_0 = const()[name = tensor<string, []>("op_6321_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6321_end_mask_0 = const()[name = tensor<string, []>("op_6321_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6321_cast_fp16 = slice_by_index(begin = var_6321_begin_0, end = var_6321_end_0, end_mask = var_6321_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6321_cast_fp16")];
+            tensor<int32, [4]> var_6328_begin_0 = const()[name = tensor<string, []>("op_6328_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6328_end_0 = const()[name = tensor<string, []>("op_6328_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6328_end_mask_0 = const()[name = tensor<string, []>("op_6328_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6328_cast_fp16 = slice_by_index(begin = var_6328_begin_0, end = var_6328_end_0, end_mask = var_6328_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6328_cast_fp16")];
+            tensor<int32, [4]> var_6335_begin_0 = const()[name = tensor<string, []>("op_6335_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6335_end_0 = const()[name = tensor<string, []>("op_6335_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6335_end_mask_0 = const()[name = tensor<string, []>("op_6335_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6335_cast_fp16 = slice_by_index(begin = var_6335_begin_0, end = var_6335_end_0, end_mask = var_6335_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6335_cast_fp16")];
+            tensor<int32, [4]> var_6342_begin_0 = const()[name = tensor<string, []>("op_6342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6342_end_0 = const()[name = tensor<string, []>("op_6342_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6342_end_mask_0 = const()[name = tensor<string, []>("op_6342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6342_cast_fp16 = slice_by_index(begin = var_6342_begin_0, end = var_6342_end_0, end_mask = var_6342_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6342_cast_fp16")];
+            tensor<int32, [4]> k_13_perm_0 = const()[name = tensor<string, []>("k_13_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_6347_begin_0 = const()[name = tensor<string, []>("op_6347_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6347_end_0 = const()[name = tensor<string, []>("op_6347_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_6347_end_mask_0 = const()[name = tensor<string, []>("op_6347_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_5 = transpose(perm = k_13_perm_0, x = key_13_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6347_cast_fp16 = slice_by_index(begin = var_6347_begin_0, end = var_6347_end_0, end_mask = var_6347_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6347_cast_fp16")];
+            tensor<int32, [4]> var_6351_begin_0 = const()[name = tensor<string, []>("op_6351_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_6351_end_0 = const()[name = tensor<string, []>("op_6351_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_6351_end_mask_0 = const()[name = tensor<string, []>("op_6351_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6351_cast_fp16 = slice_by_index(begin = var_6351_begin_0, end = var_6351_end_0, end_mask = var_6351_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6351_cast_fp16")];
+            tensor<int32, [4]> var_6355_begin_0 = const()[name = tensor<string, []>("op_6355_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_6355_end_0 = const()[name = tensor<string, []>("op_6355_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_6355_end_mask_0 = const()[name = tensor<string, []>("op_6355_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = var_6355_end_0, end_mask = var_6355_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6355_cast_fp16")];
+            tensor<int32, [4]> var_6359_begin_0 = const()[name = tensor<string, []>("op_6359_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_6359_end_0 = const()[name = tensor<string, []>("op_6359_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_6359_end_mask_0 = const()[name = tensor<string, []>("op_6359_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6359_cast_fp16 = slice_by_index(begin = var_6359_begin_0, end = var_6359_end_0, end_mask = var_6359_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6359_cast_fp16")];
+            tensor<int32, [4]> var_6363_begin_0 = const()[name = tensor<string, []>("op_6363_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_6363_end_0 = const()[name = tensor<string, []>("op_6363_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_6363_end_mask_0 = const()[name = tensor<string, []>("op_6363_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6363_cast_fp16 = slice_by_index(begin = var_6363_begin_0, end = var_6363_end_0, end_mask = var_6363_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6363_cast_fp16")];
+            tensor<int32, [4]> var_6367_begin_0 = const()[name = tensor<string, []>("op_6367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_6367_end_0 = const()[name = tensor<string, []>("op_6367_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_6367_end_mask_0 = const()[name = tensor<string, []>("op_6367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6367_cast_fp16 = slice_by_index(begin = var_6367_begin_0, end = var_6367_end_0, end_mask = var_6367_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6367_cast_fp16")];
+            tensor<int32, [4]> var_6371_begin_0 = const()[name = tensor<string, []>("op_6371_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_6371_end_0 = const()[name = tensor<string, []>("op_6371_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_6371_end_mask_0 = const()[name = tensor<string, []>("op_6371_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6371_cast_fp16 = slice_by_index(begin = var_6371_begin_0, end = var_6371_end_0, end_mask = var_6371_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6371_cast_fp16")];
+            tensor<int32, [4]> var_6375_begin_0 = const()[name = tensor<string, []>("op_6375_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_6375_end_0 = const()[name = tensor<string, []>("op_6375_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_6375_end_mask_0 = const()[name = tensor<string, []>("op_6375_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6375_cast_fp16 = slice_by_index(begin = var_6375_begin_0, end = var_6375_end_0, end_mask = var_6375_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6375_cast_fp16")];
+            tensor<int32, [4]> var_6379_begin_0 = const()[name = tensor<string, []>("op_6379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_6379_end_0 = const()[name = tensor<string, []>("op_6379_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_6379_end_mask_0 = const()[name = tensor<string, []>("op_6379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6379_cast_fp16 = slice_by_index(begin = var_6379_begin_0, end = var_6379_end_0, end_mask = var_6379_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6379_cast_fp16")];
+            tensor<int32, [4]> var_6383_begin_0 = const()[name = tensor<string, []>("op_6383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_6383_end_0 = const()[name = tensor<string, []>("op_6383_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_6383_end_mask_0 = const()[name = tensor<string, []>("op_6383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6383_cast_fp16 = slice_by_index(begin = var_6383_begin_0, end = var_6383_end_0, end_mask = var_6383_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6383_cast_fp16")];
+            tensor<int32, [4]> var_6387_begin_0 = const()[name = tensor<string, []>("op_6387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_6387_end_0 = const()[name = tensor<string, []>("op_6387_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_6387_end_mask_0 = const()[name = tensor<string, []>("op_6387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = var_6387_end_0, end_mask = var_6387_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6387_cast_fp16")];
+            tensor<int32, [4]> var_6391_begin_0 = const()[name = tensor<string, []>("op_6391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_6391_end_0 = const()[name = tensor<string, []>("op_6391_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_6391_end_mask_0 = const()[name = tensor<string, []>("op_6391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6391_cast_fp16 = slice_by_index(begin = var_6391_begin_0, end = var_6391_end_0, end_mask = var_6391_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6391_cast_fp16")];
+            tensor<int32, [4]> var_6393_begin_0 = const()[name = tensor<string, []>("op_6393_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6393_end_0 = const()[name = tensor<string, []>("op_6393_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6393_end_mask_0 = const()[name = tensor<string, []>("op_6393_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6393_cast_fp16 = slice_by_index(begin = var_6393_begin_0, end = var_6393_end_0, end_mask = var_6393_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6393_cast_fp16")];
+            tensor<int32, [4]> var_6397_begin_0 = const()[name = tensor<string, []>("op_6397_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_6397_end_0 = const()[name = tensor<string, []>("op_6397_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_6397_end_mask_0 = const()[name = tensor<string, []>("op_6397_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6397_cast_fp16 = slice_by_index(begin = var_6397_begin_0, end = var_6397_end_0, end_mask = var_6397_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6397_cast_fp16")];
+            tensor<int32, [4]> var_6401_begin_0 = const()[name = tensor<string, []>("op_6401_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_6401_end_0 = const()[name = tensor<string, []>("op_6401_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_6401_end_mask_0 = const()[name = tensor<string, []>("op_6401_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6401_cast_fp16 = slice_by_index(begin = var_6401_begin_0, end = var_6401_end_0, end_mask = var_6401_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6401_cast_fp16")];
+            tensor<int32, [4]> var_6405_begin_0 = const()[name = tensor<string, []>("op_6405_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_6405_end_0 = const()[name = tensor<string, []>("op_6405_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_6405_end_mask_0 = const()[name = tensor<string, []>("op_6405_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6405_cast_fp16 = slice_by_index(begin = var_6405_begin_0, end = var_6405_end_0, end_mask = var_6405_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6405_cast_fp16")];
+            tensor<int32, [4]> var_6409_begin_0 = const()[name = tensor<string, []>("op_6409_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_6409_end_0 = const()[name = tensor<string, []>("op_6409_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_6409_end_mask_0 = const()[name = tensor<string, []>("op_6409_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6409_cast_fp16 = slice_by_index(begin = var_6409_begin_0, end = var_6409_end_0, end_mask = var_6409_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6409_cast_fp16")];
+            tensor<int32, [4]> var_6413_begin_0 = const()[name = tensor<string, []>("op_6413_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_6413_end_0 = const()[name = tensor<string, []>("op_6413_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_6413_end_mask_0 = const()[name = tensor<string, []>("op_6413_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6413_cast_fp16 = slice_by_index(begin = var_6413_begin_0, end = var_6413_end_0, end_mask = var_6413_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6413_cast_fp16")];
+            tensor<int32, [4]> var_6417_begin_0 = const()[name = tensor<string, []>("op_6417_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_6417_end_0 = const()[name = tensor<string, []>("op_6417_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_6417_end_mask_0 = const()[name = tensor<string, []>("op_6417_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6417_cast_fp16 = slice_by_index(begin = var_6417_begin_0, end = var_6417_end_0, end_mask = var_6417_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6417_cast_fp16")];
+            tensor<int32, [4]> var_6421_begin_0 = const()[name = tensor<string, []>("op_6421_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_6421_end_0 = const()[name = tensor<string, []>("op_6421_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_6421_end_mask_0 = const()[name = tensor<string, []>("op_6421_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6421_cast_fp16 = slice_by_index(begin = var_6421_begin_0, end = var_6421_end_0, end_mask = var_6421_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6421_cast_fp16")];
+            tensor<int32, [4]> var_6425_begin_0 = const()[name = tensor<string, []>("op_6425_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_6425_end_0 = const()[name = tensor<string, []>("op_6425_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_6425_end_mask_0 = const()[name = tensor<string, []>("op_6425_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6425_cast_fp16 = slice_by_index(begin = var_6425_begin_0, end = var_6425_end_0, end_mask = var_6425_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6425_cast_fp16")];
+            tensor<int32, [4]> var_6429_begin_0 = const()[name = tensor<string, []>("op_6429_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_6429_end_0 = const()[name = tensor<string, []>("op_6429_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_6429_end_mask_0 = const()[name = tensor<string, []>("op_6429_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6429_cast_fp16 = slice_by_index(begin = var_6429_begin_0, end = var_6429_end_0, end_mask = var_6429_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6429_cast_fp16")];
+            tensor<int32, [4]> var_6433_begin_0 = const()[name = tensor<string, []>("op_6433_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6433_end_0 = const()[name = tensor<string, []>("op_6433_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6433_end_mask_0 = const()[name = tensor<string, []>("op_6433_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6433_cast_fp16 = slice_by_index(begin = var_6433_begin_0, end = var_6433_end_0, end_mask = var_6433_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6433_cast_fp16")];
+            tensor<int32, [4]> var_6437_begin_0 = const()[name = tensor<string, []>("op_6437_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6437_end_0 = const()[name = tensor<string, []>("op_6437_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6437_end_mask_0 = const()[name = tensor<string, []>("op_6437_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6437_cast_fp16")];
+            tensor<string, []> var_6441_equation_0 = const()[name = tensor<string, []>("op_6441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6441_cast_fp16 = einsum(equation = var_6441_equation_0, values = (var_6347_cast_fp16, var_6013_cast_fp16))[name = tensor<string, []>("op_6441_cast_fp16")];
+            tensor<fp16, []> var_6442_to_fp16 = const()[name = tensor<string, []>("op_6442_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_577_cast_fp16 = mul(x = var_6441_cast_fp16, y = var_6442_to_fp16)[name = tensor<string, []>("aw_chunk_577_cast_fp16")];
+            tensor<string, []> var_6445_equation_0 = const()[name = tensor<string, []>("op_6445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6445_cast_fp16 = einsum(equation = var_6445_equation_0, values = (var_6347_cast_fp16, var_6020_cast_fp16))[name = tensor<string, []>("op_6445_cast_fp16")];
+            tensor<fp16, []> var_6446_to_fp16 = const()[name = tensor<string, []>("op_6446_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_579_cast_fp16 = mul(x = var_6445_cast_fp16, y = var_6446_to_fp16)[name = tensor<string, []>("aw_chunk_579_cast_fp16")];
+            tensor<string, []> var_6449_equation_0 = const()[name = tensor<string, []>("op_6449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6449_cast_fp16 = einsum(equation = var_6449_equation_0, values = (var_6347_cast_fp16, var_6027_cast_fp16))[name = tensor<string, []>("op_6449_cast_fp16")];
+            tensor<fp16, []> var_6450_to_fp16 = const()[name = tensor<string, []>("op_6450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_581_cast_fp16 = mul(x = var_6449_cast_fp16, y = var_6450_to_fp16)[name = tensor<string, []>("aw_chunk_581_cast_fp16")];
+            tensor<string, []> var_6453_equation_0 = const()[name = tensor<string, []>("op_6453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6453_cast_fp16 = einsum(equation = var_6453_equation_0, values = (var_6347_cast_fp16, var_6034_cast_fp16))[name = tensor<string, []>("op_6453_cast_fp16")];
+            tensor<fp16, []> var_6454_to_fp16 = const()[name = tensor<string, []>("op_6454_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_583_cast_fp16 = mul(x = var_6453_cast_fp16, y = var_6454_to_fp16)[name = tensor<string, []>("aw_chunk_583_cast_fp16")];
+            tensor<string, []> var_6457_equation_0 = const()[name = tensor<string, []>("op_6457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6457_cast_fp16 = einsum(equation = var_6457_equation_0, values = (var_6351_cast_fp16, var_6041_cast_fp16))[name = tensor<string, []>("op_6457_cast_fp16")];
+            tensor<fp16, []> var_6458_to_fp16 = const()[name = tensor<string, []>("op_6458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_585_cast_fp16 = mul(x = var_6457_cast_fp16, y = var_6458_to_fp16)[name = tensor<string, []>("aw_chunk_585_cast_fp16")];
+            tensor<string, []> var_6461_equation_0 = const()[name = tensor<string, []>("op_6461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6461_cast_fp16 = einsum(equation = var_6461_equation_0, values = (var_6351_cast_fp16, var_6048_cast_fp16))[name = tensor<string, []>("op_6461_cast_fp16")];
+            tensor<fp16, []> var_6462_to_fp16 = const()[name = tensor<string, []>("op_6462_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_587_cast_fp16 = mul(x = var_6461_cast_fp16, y = var_6462_to_fp16)[name = tensor<string, []>("aw_chunk_587_cast_fp16")];
+            tensor<string, []> var_6465_equation_0 = const()[name = tensor<string, []>("op_6465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6465_cast_fp16 = einsum(equation = var_6465_equation_0, values = (var_6351_cast_fp16, var_6055_cast_fp16))[name = tensor<string, []>("op_6465_cast_fp16")];
+            tensor<fp16, []> var_6466_to_fp16 = const()[name = tensor<string, []>("op_6466_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_589_cast_fp16 = mul(x = var_6465_cast_fp16, y = var_6466_to_fp16)[name = tensor<string, []>("aw_chunk_589_cast_fp16")];
+            tensor<string, []> var_6469_equation_0 = const()[name = tensor<string, []>("op_6469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6469_cast_fp16 = einsum(equation = var_6469_equation_0, values = (var_6351_cast_fp16, var_6062_cast_fp16))[name = tensor<string, []>("op_6469_cast_fp16")];
+            tensor<fp16, []> var_6470_to_fp16 = const()[name = tensor<string, []>("op_6470_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_591_cast_fp16 = mul(x = var_6469_cast_fp16, y = var_6470_to_fp16)[name = tensor<string, []>("aw_chunk_591_cast_fp16")];
+            tensor<string, []> var_6473_equation_0 = const()[name = tensor<string, []>("op_6473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6473_cast_fp16 = einsum(equation = var_6473_equation_0, values = (var_6355_cast_fp16, var_6069_cast_fp16))[name = tensor<string, []>("op_6473_cast_fp16")];
+            tensor<fp16, []> var_6474_to_fp16 = const()[name = tensor<string, []>("op_6474_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_593_cast_fp16 = mul(x = var_6473_cast_fp16, y = var_6474_to_fp16)[name = tensor<string, []>("aw_chunk_593_cast_fp16")];
+            tensor<string, []> var_6477_equation_0 = const()[name = tensor<string, []>("op_6477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6477_cast_fp16 = einsum(equation = var_6477_equation_0, values = (var_6355_cast_fp16, var_6076_cast_fp16))[name = tensor<string, []>("op_6477_cast_fp16")];
+            tensor<fp16, []> var_6478_to_fp16 = const()[name = tensor<string, []>("op_6478_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_595_cast_fp16 = mul(x = var_6477_cast_fp16, y = var_6478_to_fp16)[name = tensor<string, []>("aw_chunk_595_cast_fp16")];
+            tensor<string, []> var_6481_equation_0 = const()[name = tensor<string, []>("op_6481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6481_cast_fp16 = einsum(equation = var_6481_equation_0, values = (var_6355_cast_fp16, var_6083_cast_fp16))[name = tensor<string, []>("op_6481_cast_fp16")];
+            tensor<fp16, []> var_6482_to_fp16 = const()[name = tensor<string, []>("op_6482_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_597_cast_fp16 = mul(x = var_6481_cast_fp16, y = var_6482_to_fp16)[name = tensor<string, []>("aw_chunk_597_cast_fp16")];
+            tensor<string, []> var_6485_equation_0 = const()[name = tensor<string, []>("op_6485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6485_cast_fp16 = einsum(equation = var_6485_equation_0, values = (var_6355_cast_fp16, var_6090_cast_fp16))[name = tensor<string, []>("op_6485_cast_fp16")];
+            tensor<fp16, []> var_6486_to_fp16 = const()[name = tensor<string, []>("op_6486_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_599_cast_fp16 = mul(x = var_6485_cast_fp16, y = var_6486_to_fp16)[name = tensor<string, []>("aw_chunk_599_cast_fp16")];
+            tensor<string, []> var_6489_equation_0 = const()[name = tensor<string, []>("op_6489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6489_cast_fp16 = einsum(equation = var_6489_equation_0, values = (var_6359_cast_fp16, var_6097_cast_fp16))[name = tensor<string, []>("op_6489_cast_fp16")];
+            tensor<fp16, []> var_6490_to_fp16 = const()[name = tensor<string, []>("op_6490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_601_cast_fp16 = mul(x = var_6489_cast_fp16, y = var_6490_to_fp16)[name = tensor<string, []>("aw_chunk_601_cast_fp16")];
+            tensor<string, []> var_6493_equation_0 = const()[name = tensor<string, []>("op_6493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6493_cast_fp16 = einsum(equation = var_6493_equation_0, values = (var_6359_cast_fp16, var_6104_cast_fp16))[name = tensor<string, []>("op_6493_cast_fp16")];
+            tensor<fp16, []> var_6494_to_fp16 = const()[name = tensor<string, []>("op_6494_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_603_cast_fp16 = mul(x = var_6493_cast_fp16, y = var_6494_to_fp16)[name = tensor<string, []>("aw_chunk_603_cast_fp16")];
+            tensor<string, []> var_6497_equation_0 = const()[name = tensor<string, []>("op_6497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6497_cast_fp16 = einsum(equation = var_6497_equation_0, values = (var_6359_cast_fp16, var_6111_cast_fp16))[name = tensor<string, []>("op_6497_cast_fp16")];
+            tensor<fp16, []> var_6498_to_fp16 = const()[name = tensor<string, []>("op_6498_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_605_cast_fp16 = mul(x = var_6497_cast_fp16, y = var_6498_to_fp16)[name = tensor<string, []>("aw_chunk_605_cast_fp16")];
+            tensor<string, []> var_6501_equation_0 = const()[name = tensor<string, []>("op_6501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6501_cast_fp16 = einsum(equation = var_6501_equation_0, values = (var_6359_cast_fp16, var_6118_cast_fp16))[name = tensor<string, []>("op_6501_cast_fp16")];
+            tensor<fp16, []> var_6502_to_fp16 = const()[name = tensor<string, []>("op_6502_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_607_cast_fp16 = mul(x = var_6501_cast_fp16, y = var_6502_to_fp16)[name = tensor<string, []>("aw_chunk_607_cast_fp16")];
+            tensor<string, []> var_6505_equation_0 = const()[name = tensor<string, []>("op_6505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6505_cast_fp16 = einsum(equation = var_6505_equation_0, values = (var_6363_cast_fp16, var_6125_cast_fp16))[name = tensor<string, []>("op_6505_cast_fp16")];
+            tensor<fp16, []> var_6506_to_fp16 = const()[name = tensor<string, []>("op_6506_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_609_cast_fp16 = mul(x = var_6505_cast_fp16, y = var_6506_to_fp16)[name = tensor<string, []>("aw_chunk_609_cast_fp16")];
+            tensor<string, []> var_6509_equation_0 = const()[name = tensor<string, []>("op_6509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6509_cast_fp16 = einsum(equation = var_6509_equation_0, values = (var_6363_cast_fp16, var_6132_cast_fp16))[name = tensor<string, []>("op_6509_cast_fp16")];
+            tensor<fp16, []> var_6510_to_fp16 = const()[name = tensor<string, []>("op_6510_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_611_cast_fp16 = mul(x = var_6509_cast_fp16, y = var_6510_to_fp16)[name = tensor<string, []>("aw_chunk_611_cast_fp16")];
+            tensor<string, []> var_6513_equation_0 = const()[name = tensor<string, []>("op_6513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6513_cast_fp16 = einsum(equation = var_6513_equation_0, values = (var_6363_cast_fp16, var_6139_cast_fp16))[name = tensor<string, []>("op_6513_cast_fp16")];
+            tensor<fp16, []> var_6514_to_fp16 = const()[name = tensor<string, []>("op_6514_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_613_cast_fp16 = mul(x = var_6513_cast_fp16, y = var_6514_to_fp16)[name = tensor<string, []>("aw_chunk_613_cast_fp16")];
+            tensor<string, []> var_6517_equation_0 = const()[name = tensor<string, []>("op_6517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6517_cast_fp16 = einsum(equation = var_6517_equation_0, values = (var_6363_cast_fp16, var_6146_cast_fp16))[name = tensor<string, []>("op_6517_cast_fp16")];
+            tensor<fp16, []> var_6518_to_fp16 = const()[name = tensor<string, []>("op_6518_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_615_cast_fp16 = mul(x = var_6517_cast_fp16, y = var_6518_to_fp16)[name = tensor<string, []>("aw_chunk_615_cast_fp16")];
+            tensor<string, []> var_6521_equation_0 = const()[name = tensor<string, []>("op_6521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6521_cast_fp16 = einsum(equation = var_6521_equation_0, values = (var_6367_cast_fp16, var_6153_cast_fp16))[name = tensor<string, []>("op_6521_cast_fp16")];
+            tensor<fp16, []> var_6522_to_fp16 = const()[name = tensor<string, []>("op_6522_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_617_cast_fp16 = mul(x = var_6521_cast_fp16, y = var_6522_to_fp16)[name = tensor<string, []>("aw_chunk_617_cast_fp16")];
+            tensor<string, []> var_6525_equation_0 = const()[name = tensor<string, []>("op_6525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6525_cast_fp16 = einsum(equation = var_6525_equation_0, values = (var_6367_cast_fp16, var_6160_cast_fp16))[name = tensor<string, []>("op_6525_cast_fp16")];
+            tensor<fp16, []> var_6526_to_fp16 = const()[name = tensor<string, []>("op_6526_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_619_cast_fp16 = mul(x = var_6525_cast_fp16, y = var_6526_to_fp16)[name = tensor<string, []>("aw_chunk_619_cast_fp16")];
+            tensor<string, []> var_6529_equation_0 = const()[name = tensor<string, []>("op_6529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6529_cast_fp16 = einsum(equation = var_6529_equation_0, values = (var_6367_cast_fp16, var_6167_cast_fp16))[name = tensor<string, []>("op_6529_cast_fp16")];
+            tensor<fp16, []> var_6530_to_fp16 = const()[name = tensor<string, []>("op_6530_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_621_cast_fp16 = mul(x = var_6529_cast_fp16, y = var_6530_to_fp16)[name = tensor<string, []>("aw_chunk_621_cast_fp16")];
+            tensor<string, []> var_6533_equation_0 = const()[name = tensor<string, []>("op_6533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6533_cast_fp16 = einsum(equation = var_6533_equation_0, values = (var_6367_cast_fp16, var_6174_cast_fp16))[name = tensor<string, []>("op_6533_cast_fp16")];
+            tensor<fp16, []> var_6534_to_fp16 = const()[name = tensor<string, []>("op_6534_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_623_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6534_to_fp16)[name = tensor<string, []>("aw_chunk_623_cast_fp16")];
+            tensor<string, []> var_6537_equation_0 = const()[name = tensor<string, []>("op_6537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6537_cast_fp16 = einsum(equation = var_6537_equation_0, values = (var_6371_cast_fp16, var_6181_cast_fp16))[name = tensor<string, []>("op_6537_cast_fp16")];
+            tensor<fp16, []> var_6538_to_fp16 = const()[name = tensor<string, []>("op_6538_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_625_cast_fp16 = mul(x = var_6537_cast_fp16, y = var_6538_to_fp16)[name = tensor<string, []>("aw_chunk_625_cast_fp16")];
+            tensor<string, []> var_6541_equation_0 = const()[name = tensor<string, []>("op_6541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6541_cast_fp16 = einsum(equation = var_6541_equation_0, values = (var_6371_cast_fp16, var_6188_cast_fp16))[name = tensor<string, []>("op_6541_cast_fp16")];
+            tensor<fp16, []> var_6542_to_fp16 = const()[name = tensor<string, []>("op_6542_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_627_cast_fp16 = mul(x = var_6541_cast_fp16, y = var_6542_to_fp16)[name = tensor<string, []>("aw_chunk_627_cast_fp16")];
+            tensor<string, []> var_6545_equation_0 = const()[name = tensor<string, []>("op_6545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6545_cast_fp16 = einsum(equation = var_6545_equation_0, values = (var_6371_cast_fp16, var_6195_cast_fp16))[name = tensor<string, []>("op_6545_cast_fp16")];
+            tensor<fp16, []> var_6546_to_fp16 = const()[name = tensor<string, []>("op_6546_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_629_cast_fp16 = mul(x = var_6545_cast_fp16, y = var_6546_to_fp16)[name = tensor<string, []>("aw_chunk_629_cast_fp16")];
+            tensor<string, []> var_6549_equation_0 = const()[name = tensor<string, []>("op_6549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6549_cast_fp16 = einsum(equation = var_6549_equation_0, values = (var_6371_cast_fp16, var_6202_cast_fp16))[name = tensor<string, []>("op_6549_cast_fp16")];
+            tensor<fp16, []> var_6550_to_fp16 = const()[name = tensor<string, []>("op_6550_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_631_cast_fp16 = mul(x = var_6549_cast_fp16, y = var_6550_to_fp16)[name = tensor<string, []>("aw_chunk_631_cast_fp16")];
+            tensor<string, []> var_6553_equation_0 = const()[name = tensor<string, []>("op_6553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6553_cast_fp16 = einsum(equation = var_6553_equation_0, values = (var_6375_cast_fp16, var_6209_cast_fp16))[name = tensor<string, []>("op_6553_cast_fp16")];
+            tensor<fp16, []> var_6554_to_fp16 = const()[name = tensor<string, []>("op_6554_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_633_cast_fp16 = mul(x = var_6553_cast_fp16, y = var_6554_to_fp16)[name = tensor<string, []>("aw_chunk_633_cast_fp16")];
+            tensor<string, []> var_6557_equation_0 = const()[name = tensor<string, []>("op_6557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6557_cast_fp16 = einsum(equation = var_6557_equation_0, values = (var_6375_cast_fp16, var_6216_cast_fp16))[name = tensor<string, []>("op_6557_cast_fp16")];
+            tensor<fp16, []> var_6558_to_fp16 = const()[name = tensor<string, []>("op_6558_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_635_cast_fp16 = mul(x = var_6557_cast_fp16, y = var_6558_to_fp16)[name = tensor<string, []>("aw_chunk_635_cast_fp16")];
+            tensor<string, []> var_6561_equation_0 = const()[name = tensor<string, []>("op_6561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6561_cast_fp16 = einsum(equation = var_6561_equation_0, values = (var_6375_cast_fp16, var_6223_cast_fp16))[name = tensor<string, []>("op_6561_cast_fp16")];
+            tensor<fp16, []> var_6562_to_fp16 = const()[name = tensor<string, []>("op_6562_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_637_cast_fp16 = mul(x = var_6561_cast_fp16, y = var_6562_to_fp16)[name = tensor<string, []>("aw_chunk_637_cast_fp16")];
+            tensor<string, []> var_6565_equation_0 = const()[name = tensor<string, []>("op_6565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6565_cast_fp16 = einsum(equation = var_6565_equation_0, values = (var_6375_cast_fp16, var_6230_cast_fp16))[name = tensor<string, []>("op_6565_cast_fp16")];
+            tensor<fp16, []> var_6566_to_fp16 = const()[name = tensor<string, []>("op_6566_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_639_cast_fp16 = mul(x = var_6565_cast_fp16, y = var_6566_to_fp16)[name = tensor<string, []>("aw_chunk_639_cast_fp16")];
+            tensor<string, []> var_6569_equation_0 = const()[name = tensor<string, []>("op_6569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6569_cast_fp16 = einsum(equation = var_6569_equation_0, values = (var_6379_cast_fp16, var_6237_cast_fp16))[name = tensor<string, []>("op_6569_cast_fp16")];
+            tensor<fp16, []> var_6570_to_fp16 = const()[name = tensor<string, []>("op_6570_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_641_cast_fp16 = mul(x = var_6569_cast_fp16, y = var_6570_to_fp16)[name = tensor<string, []>("aw_chunk_641_cast_fp16")];
+            tensor<string, []> var_6573_equation_0 = const()[name = tensor<string, []>("op_6573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6573_cast_fp16 = einsum(equation = var_6573_equation_0, values = (var_6379_cast_fp16, var_6244_cast_fp16))[name = tensor<string, []>("op_6573_cast_fp16")];
+            tensor<fp16, []> var_6574_to_fp16 = const()[name = tensor<string, []>("op_6574_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_643_cast_fp16 = mul(x = var_6573_cast_fp16, y = var_6574_to_fp16)[name = tensor<string, []>("aw_chunk_643_cast_fp16")];
+            tensor<string, []> var_6577_equation_0 = const()[name = tensor<string, []>("op_6577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6577_cast_fp16 = einsum(equation = var_6577_equation_0, values = (var_6379_cast_fp16, var_6251_cast_fp16))[name = tensor<string, []>("op_6577_cast_fp16")];
+            tensor<fp16, []> var_6578_to_fp16 = const()[name = tensor<string, []>("op_6578_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_645_cast_fp16 = mul(x = var_6577_cast_fp16, y = var_6578_to_fp16)[name = tensor<string, []>("aw_chunk_645_cast_fp16")];
+            tensor<string, []> var_6581_equation_0 = const()[name = tensor<string, []>("op_6581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6581_cast_fp16 = einsum(equation = var_6581_equation_0, values = (var_6379_cast_fp16, var_6258_cast_fp16))[name = tensor<string, []>("op_6581_cast_fp16")];
+            tensor<fp16, []> var_6582_to_fp16 = const()[name = tensor<string, []>("op_6582_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_647_cast_fp16 = mul(x = var_6581_cast_fp16, y = var_6582_to_fp16)[name = tensor<string, []>("aw_chunk_647_cast_fp16")];
+            tensor<string, []> var_6585_equation_0 = const()[name = tensor<string, []>("op_6585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6585_cast_fp16 = einsum(equation = var_6585_equation_0, values = (var_6383_cast_fp16, var_6265_cast_fp16))[name = tensor<string, []>("op_6585_cast_fp16")];
+            tensor<fp16, []> var_6586_to_fp16 = const()[name = tensor<string, []>("op_6586_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_649_cast_fp16 = mul(x = var_6585_cast_fp16, y = var_6586_to_fp16)[name = tensor<string, []>("aw_chunk_649_cast_fp16")];
+            tensor<string, []> var_6589_equation_0 = const()[name = tensor<string, []>("op_6589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6589_cast_fp16 = einsum(equation = var_6589_equation_0, values = (var_6383_cast_fp16, var_6272_cast_fp16))[name = tensor<string, []>("op_6589_cast_fp16")];
+            tensor<fp16, []> var_6590_to_fp16 = const()[name = tensor<string, []>("op_6590_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_651_cast_fp16 = mul(x = var_6589_cast_fp16, y = var_6590_to_fp16)[name = tensor<string, []>("aw_chunk_651_cast_fp16")];
+            tensor<string, []> var_6593_equation_0 = const()[name = tensor<string, []>("op_6593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6593_cast_fp16 = einsum(equation = var_6593_equation_0, values = (var_6383_cast_fp16, var_6279_cast_fp16))[name = tensor<string, []>("op_6593_cast_fp16")];
+            tensor<fp16, []> var_6594_to_fp16 = const()[name = tensor<string, []>("op_6594_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_653_cast_fp16 = mul(x = var_6593_cast_fp16, y = var_6594_to_fp16)[name = tensor<string, []>("aw_chunk_653_cast_fp16")];
+            tensor<string, []> var_6597_equation_0 = const()[name = tensor<string, []>("op_6597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6597_cast_fp16 = einsum(equation = var_6597_equation_0, values = (var_6383_cast_fp16, var_6286_cast_fp16))[name = tensor<string, []>("op_6597_cast_fp16")];
+            tensor<fp16, []> var_6598_to_fp16 = const()[name = tensor<string, []>("op_6598_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_655_cast_fp16 = mul(x = var_6597_cast_fp16, y = var_6598_to_fp16)[name = tensor<string, []>("aw_chunk_655_cast_fp16")];
+            tensor<string, []> var_6601_equation_0 = const()[name = tensor<string, []>("op_6601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6601_cast_fp16 = einsum(equation = var_6601_equation_0, values = (var_6387_cast_fp16, var_6293_cast_fp16))[name = tensor<string, []>("op_6601_cast_fp16")];
+            tensor<fp16, []> var_6602_to_fp16 = const()[name = tensor<string, []>("op_6602_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_657_cast_fp16 = mul(x = var_6601_cast_fp16, y = var_6602_to_fp16)[name = tensor<string, []>("aw_chunk_657_cast_fp16")];
+            tensor<string, []> var_6605_equation_0 = const()[name = tensor<string, []>("op_6605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6605_cast_fp16 = einsum(equation = var_6605_equation_0, values = (var_6387_cast_fp16, var_6300_cast_fp16))[name = tensor<string, []>("op_6605_cast_fp16")];
+            tensor<fp16, []> var_6606_to_fp16 = const()[name = tensor<string, []>("op_6606_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_659_cast_fp16 = mul(x = var_6605_cast_fp16, y = var_6606_to_fp16)[name = tensor<string, []>("aw_chunk_659_cast_fp16")];
+            tensor<string, []> var_6609_equation_0 = const()[name = tensor<string, []>("op_6609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6609_cast_fp16 = einsum(equation = var_6609_equation_0, values = (var_6387_cast_fp16, var_6307_cast_fp16))[name = tensor<string, []>("op_6609_cast_fp16")];
+            tensor<fp16, []> var_6610_to_fp16 = const()[name = tensor<string, []>("op_6610_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_661_cast_fp16 = mul(x = var_6609_cast_fp16, y = var_6610_to_fp16)[name = tensor<string, []>("aw_chunk_661_cast_fp16")];
+            tensor<string, []> var_6613_equation_0 = const()[name = tensor<string, []>("op_6613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6613_cast_fp16 = einsum(equation = var_6613_equation_0, values = (var_6387_cast_fp16, var_6314_cast_fp16))[name = tensor<string, []>("op_6613_cast_fp16")];
+            tensor<fp16, []> var_6614_to_fp16 = const()[name = tensor<string, []>("op_6614_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_663_cast_fp16 = mul(x = var_6613_cast_fp16, y = var_6614_to_fp16)[name = tensor<string, []>("aw_chunk_663_cast_fp16")];
+            tensor<string, []> var_6617_equation_0 = const()[name = tensor<string, []>("op_6617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6617_cast_fp16 = einsum(equation = var_6617_equation_0, values = (var_6391_cast_fp16, var_6321_cast_fp16))[name = tensor<string, []>("op_6617_cast_fp16")];
+            tensor<fp16, []> var_6618_to_fp16 = const()[name = tensor<string, []>("op_6618_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_665_cast_fp16 = mul(x = var_6617_cast_fp16, y = var_6618_to_fp16)[name = tensor<string, []>("aw_chunk_665_cast_fp16")];
+            tensor<string, []> var_6621_equation_0 = const()[name = tensor<string, []>("op_6621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6621_cast_fp16 = einsum(equation = var_6621_equation_0, values = (var_6391_cast_fp16, var_6328_cast_fp16))[name = tensor<string, []>("op_6621_cast_fp16")];
+            tensor<fp16, []> var_6622_to_fp16 = const()[name = tensor<string, []>("op_6622_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_667_cast_fp16 = mul(x = var_6621_cast_fp16, y = var_6622_to_fp16)[name = tensor<string, []>("aw_chunk_667_cast_fp16")];
+            tensor<string, []> var_6625_equation_0 = const()[name = tensor<string, []>("op_6625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6625_cast_fp16 = einsum(equation = var_6625_equation_0, values = (var_6391_cast_fp16, var_6335_cast_fp16))[name = tensor<string, []>("op_6625_cast_fp16")];
+            tensor<fp16, []> var_6626_to_fp16 = const()[name = tensor<string, []>("op_6626_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_669_cast_fp16 = mul(x = var_6625_cast_fp16, y = var_6626_to_fp16)[name = tensor<string, []>("aw_chunk_669_cast_fp16")];
+            tensor<string, []> var_6629_equation_0 = const()[name = tensor<string, []>("op_6629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6629_cast_fp16 = einsum(equation = var_6629_equation_0, values = (var_6391_cast_fp16, var_6342_cast_fp16))[name = tensor<string, []>("op_6629_cast_fp16")];
+            tensor<fp16, []> var_6630_to_fp16 = const()[name = tensor<string, []>("op_6630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_671_cast_fp16 = mul(x = var_6629_cast_fp16, y = var_6630_to_fp16)[name = tensor<string, []>("aw_chunk_671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6632_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_577_cast_fp16)[name = tensor<string, []>("op_6632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6633_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_579_cast_fp16)[name = tensor<string, []>("op_6633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6634_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_581_cast_fp16)[name = tensor<string, []>("op_6634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6635_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_583_cast_fp16)[name = tensor<string, []>("op_6635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6636_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_585_cast_fp16)[name = tensor<string, []>("op_6636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6637_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_587_cast_fp16)[name = tensor<string, []>("op_6637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6638_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_589_cast_fp16)[name = tensor<string, []>("op_6638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6639_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_591_cast_fp16)[name = tensor<string, []>("op_6639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6640_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_593_cast_fp16)[name = tensor<string, []>("op_6640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6641_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_595_cast_fp16)[name = tensor<string, []>("op_6641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6642_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_597_cast_fp16)[name = tensor<string, []>("op_6642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6643_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_599_cast_fp16)[name = tensor<string, []>("op_6643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6644_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_601_cast_fp16)[name = tensor<string, []>("op_6644_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6645_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_603_cast_fp16)[name = tensor<string, []>("op_6645_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6646_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_605_cast_fp16)[name = tensor<string, []>("op_6646_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6647_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_607_cast_fp16)[name = tensor<string, []>("op_6647_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6648_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_609_cast_fp16)[name = tensor<string, []>("op_6648_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6649_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_611_cast_fp16)[name = tensor<string, []>("op_6649_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6650_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_613_cast_fp16)[name = tensor<string, []>("op_6650_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6651_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_615_cast_fp16)[name = tensor<string, []>("op_6651_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6652_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_617_cast_fp16)[name = tensor<string, []>("op_6652_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6653_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_619_cast_fp16)[name = tensor<string, []>("op_6653_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6654_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_621_cast_fp16)[name = tensor<string, []>("op_6654_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6655_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_623_cast_fp16)[name = tensor<string, []>("op_6655_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6656_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_625_cast_fp16)[name = tensor<string, []>("op_6656_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6657_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_627_cast_fp16)[name = tensor<string, []>("op_6657_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6658_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_629_cast_fp16)[name = tensor<string, []>("op_6658_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6659_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_631_cast_fp16)[name = tensor<string, []>("op_6659_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6660_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_633_cast_fp16)[name = tensor<string, []>("op_6660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6661_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_635_cast_fp16)[name = tensor<string, []>("op_6661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6662_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_637_cast_fp16)[name = tensor<string, []>("op_6662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6663_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_639_cast_fp16)[name = tensor<string, []>("op_6663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6664_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_641_cast_fp16)[name = tensor<string, []>("op_6664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6665_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_643_cast_fp16)[name = tensor<string, []>("op_6665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6666_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_645_cast_fp16)[name = tensor<string, []>("op_6666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6667_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_647_cast_fp16)[name = tensor<string, []>("op_6667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6668_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_649_cast_fp16)[name = tensor<string, []>("op_6668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6669_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_651_cast_fp16)[name = tensor<string, []>("op_6669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6670_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_653_cast_fp16)[name = tensor<string, []>("op_6670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6671_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_655_cast_fp16)[name = tensor<string, []>("op_6671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6672_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_657_cast_fp16)[name = tensor<string, []>("op_6672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6673_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_659_cast_fp16)[name = tensor<string, []>("op_6673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6674_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_661_cast_fp16)[name = tensor<string, []>("op_6674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6675_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_663_cast_fp16)[name = tensor<string, []>("op_6675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6676_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_665_cast_fp16)[name = tensor<string, []>("op_6676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6677_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_667_cast_fp16)[name = tensor<string, []>("op_6677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6678_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_669_cast_fp16)[name = tensor<string, []>("op_6678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6679_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_671_cast_fp16)[name = tensor<string, []>("op_6679_cast_fp16")];
+            tensor<string, []> var_6681_equation_0 = const()[name = tensor<string, []>("op_6681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6681_cast_fp16 = einsum(equation = var_6681_equation_0, values = (var_6393_cast_fp16, var_6632_cast_fp16))[name = tensor<string, []>("op_6681_cast_fp16")];
+            tensor<string, []> var_6683_equation_0 = const()[name = tensor<string, []>("op_6683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6683_cast_fp16 = einsum(equation = var_6683_equation_0, values = (var_6393_cast_fp16, var_6633_cast_fp16))[name = tensor<string, []>("op_6683_cast_fp16")];
+            tensor<string, []> var_6685_equation_0 = const()[name = tensor<string, []>("op_6685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6685_cast_fp16 = einsum(equation = var_6685_equation_0, values = (var_6393_cast_fp16, var_6634_cast_fp16))[name = tensor<string, []>("op_6685_cast_fp16")];
+            tensor<string, []> var_6687_equation_0 = const()[name = tensor<string, []>("op_6687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6687_cast_fp16 = einsum(equation = var_6687_equation_0, values = (var_6393_cast_fp16, var_6635_cast_fp16))[name = tensor<string, []>("op_6687_cast_fp16")];
+            tensor<string, []> var_6689_equation_0 = const()[name = tensor<string, []>("op_6689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6689_cast_fp16 = einsum(equation = var_6689_equation_0, values = (var_6397_cast_fp16, var_6636_cast_fp16))[name = tensor<string, []>("op_6689_cast_fp16")];
+            tensor<string, []> var_6691_equation_0 = const()[name = tensor<string, []>("op_6691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6691_cast_fp16 = einsum(equation = var_6691_equation_0, values = (var_6397_cast_fp16, var_6637_cast_fp16))[name = tensor<string, []>("op_6691_cast_fp16")];
+            tensor<string, []> var_6693_equation_0 = const()[name = tensor<string, []>("op_6693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6693_cast_fp16 = einsum(equation = var_6693_equation_0, values = (var_6397_cast_fp16, var_6638_cast_fp16))[name = tensor<string, []>("op_6693_cast_fp16")];
+            tensor<string, []> var_6695_equation_0 = const()[name = tensor<string, []>("op_6695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6695_cast_fp16 = einsum(equation = var_6695_equation_0, values = (var_6397_cast_fp16, var_6639_cast_fp16))[name = tensor<string, []>("op_6695_cast_fp16")];
+            tensor<string, []> var_6697_equation_0 = const()[name = tensor<string, []>("op_6697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6697_cast_fp16 = einsum(equation = var_6697_equation_0, values = (var_6401_cast_fp16, var_6640_cast_fp16))[name = tensor<string, []>("op_6697_cast_fp16")];
+            tensor<string, []> var_6699_equation_0 = const()[name = tensor<string, []>("op_6699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6699_cast_fp16 = einsum(equation = var_6699_equation_0, values = (var_6401_cast_fp16, var_6641_cast_fp16))[name = tensor<string, []>("op_6699_cast_fp16")];
+            tensor<string, []> var_6701_equation_0 = const()[name = tensor<string, []>("op_6701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6701_cast_fp16 = einsum(equation = var_6701_equation_0, values = (var_6401_cast_fp16, var_6642_cast_fp16))[name = tensor<string, []>("op_6701_cast_fp16")];
+            tensor<string, []> var_6703_equation_0 = const()[name = tensor<string, []>("op_6703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6703_cast_fp16 = einsum(equation = var_6703_equation_0, values = (var_6401_cast_fp16, var_6643_cast_fp16))[name = tensor<string, []>("op_6703_cast_fp16")];
+            tensor<string, []> var_6705_equation_0 = const()[name = tensor<string, []>("op_6705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6705_cast_fp16 = einsum(equation = var_6705_equation_0, values = (var_6405_cast_fp16, var_6644_cast_fp16))[name = tensor<string, []>("op_6705_cast_fp16")];
+            tensor<string, []> var_6707_equation_0 = const()[name = tensor<string, []>("op_6707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6707_cast_fp16 = einsum(equation = var_6707_equation_0, values = (var_6405_cast_fp16, var_6645_cast_fp16))[name = tensor<string, []>("op_6707_cast_fp16")];
+            tensor<string, []> var_6709_equation_0 = const()[name = tensor<string, []>("op_6709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6709_cast_fp16 = einsum(equation = var_6709_equation_0, values = (var_6405_cast_fp16, var_6646_cast_fp16))[name = tensor<string, []>("op_6709_cast_fp16")];
+            tensor<string, []> var_6711_equation_0 = const()[name = tensor<string, []>("op_6711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6711_cast_fp16 = einsum(equation = var_6711_equation_0, values = (var_6405_cast_fp16, var_6647_cast_fp16))[name = tensor<string, []>("op_6711_cast_fp16")];
+            tensor<string, []> var_6713_equation_0 = const()[name = tensor<string, []>("op_6713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6713_cast_fp16 = einsum(equation = var_6713_equation_0, values = (var_6409_cast_fp16, var_6648_cast_fp16))[name = tensor<string, []>("op_6713_cast_fp16")];
+            tensor<string, []> var_6715_equation_0 = const()[name = tensor<string, []>("op_6715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6715_cast_fp16 = einsum(equation = var_6715_equation_0, values = (var_6409_cast_fp16, var_6649_cast_fp16))[name = tensor<string, []>("op_6715_cast_fp16")];
+            tensor<string, []> var_6717_equation_0 = const()[name = tensor<string, []>("op_6717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6717_cast_fp16 = einsum(equation = var_6717_equation_0, values = (var_6409_cast_fp16, var_6650_cast_fp16))[name = tensor<string, []>("op_6717_cast_fp16")];
+            tensor<string, []> var_6719_equation_0 = const()[name = tensor<string, []>("op_6719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6719_cast_fp16 = einsum(equation = var_6719_equation_0, values = (var_6409_cast_fp16, var_6651_cast_fp16))[name = tensor<string, []>("op_6719_cast_fp16")];
+            tensor<string, []> var_6721_equation_0 = const()[name = tensor<string, []>("op_6721_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6721_cast_fp16 = einsum(equation = var_6721_equation_0, values = (var_6413_cast_fp16, var_6652_cast_fp16))[name = tensor<string, []>("op_6721_cast_fp16")];
+            tensor<string, []> var_6723_equation_0 = const()[name = tensor<string, []>("op_6723_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6723_cast_fp16 = einsum(equation = var_6723_equation_0, values = (var_6413_cast_fp16, var_6653_cast_fp16))[name = tensor<string, []>("op_6723_cast_fp16")];
+            tensor<string, []> var_6725_equation_0 = const()[name = tensor<string, []>("op_6725_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6725_cast_fp16 = einsum(equation = var_6725_equation_0, values = (var_6413_cast_fp16, var_6654_cast_fp16))[name = tensor<string, []>("op_6725_cast_fp16")];
+            tensor<string, []> var_6727_equation_0 = const()[name = tensor<string, []>("op_6727_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6727_cast_fp16 = einsum(equation = var_6727_equation_0, values = (var_6413_cast_fp16, var_6655_cast_fp16))[name = tensor<string, []>("op_6727_cast_fp16")];
+            tensor<string, []> var_6729_equation_0 = const()[name = tensor<string, []>("op_6729_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6729_cast_fp16 = einsum(equation = var_6729_equation_0, values = (var_6417_cast_fp16, var_6656_cast_fp16))[name = tensor<string, []>("op_6729_cast_fp16")];
+            tensor<string, []> var_6731_equation_0 = const()[name = tensor<string, []>("op_6731_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6731_cast_fp16 = einsum(equation = var_6731_equation_0, values = (var_6417_cast_fp16, var_6657_cast_fp16))[name = tensor<string, []>("op_6731_cast_fp16")];
+            tensor<string, []> var_6733_equation_0 = const()[name = tensor<string, []>("op_6733_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6733_cast_fp16 = einsum(equation = var_6733_equation_0, values = (var_6417_cast_fp16, var_6658_cast_fp16))[name = tensor<string, []>("op_6733_cast_fp16")];
+            tensor<string, []> var_6735_equation_0 = const()[name = tensor<string, []>("op_6735_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6735_cast_fp16 = einsum(equation = var_6735_equation_0, values = (var_6417_cast_fp16, var_6659_cast_fp16))[name = tensor<string, []>("op_6735_cast_fp16")];
+            tensor<string, []> var_6737_equation_0 = const()[name = tensor<string, []>("op_6737_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6737_cast_fp16 = einsum(equation = var_6737_equation_0, values = (var_6421_cast_fp16, var_6660_cast_fp16))[name = tensor<string, []>("op_6737_cast_fp16")];
+            tensor<string, []> var_6739_equation_0 = const()[name = tensor<string, []>("op_6739_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6739_cast_fp16 = einsum(equation = var_6739_equation_0, values = (var_6421_cast_fp16, var_6661_cast_fp16))[name = tensor<string, []>("op_6739_cast_fp16")];
+            tensor<string, []> var_6741_equation_0 = const()[name = tensor<string, []>("op_6741_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6741_cast_fp16 = einsum(equation = var_6741_equation_0, values = (var_6421_cast_fp16, var_6662_cast_fp16))[name = tensor<string, []>("op_6741_cast_fp16")];
+            tensor<string, []> var_6743_equation_0 = const()[name = tensor<string, []>("op_6743_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6743_cast_fp16 = einsum(equation = var_6743_equation_0, values = (var_6421_cast_fp16, var_6663_cast_fp16))[name = tensor<string, []>("op_6743_cast_fp16")];
+            tensor<string, []> var_6745_equation_0 = const()[name = tensor<string, []>("op_6745_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6745_cast_fp16 = einsum(equation = var_6745_equation_0, values = (var_6425_cast_fp16, var_6664_cast_fp16))[name = tensor<string, []>("op_6745_cast_fp16")];
+            tensor<string, []> var_6747_equation_0 = const()[name = tensor<string, []>("op_6747_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6747_cast_fp16 = einsum(equation = var_6747_equation_0, values = (var_6425_cast_fp16, var_6665_cast_fp16))[name = tensor<string, []>("op_6747_cast_fp16")];
+            tensor<string, []> var_6749_equation_0 = const()[name = tensor<string, []>("op_6749_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6749_cast_fp16 = einsum(equation = var_6749_equation_0, values = (var_6425_cast_fp16, var_6666_cast_fp16))[name = tensor<string, []>("op_6749_cast_fp16")];
+            tensor<string, []> var_6751_equation_0 = const()[name = tensor<string, []>("op_6751_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6751_cast_fp16 = einsum(equation = var_6751_equation_0, values = (var_6425_cast_fp16, var_6667_cast_fp16))[name = tensor<string, []>("op_6751_cast_fp16")];
+            tensor<string, []> var_6753_equation_0 = const()[name = tensor<string, []>("op_6753_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6753_cast_fp16 = einsum(equation = var_6753_equation_0, values = (var_6429_cast_fp16, var_6668_cast_fp16))[name = tensor<string, []>("op_6753_cast_fp16")];
+            tensor<string, []> var_6755_equation_0 = const()[name = tensor<string, []>("op_6755_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6755_cast_fp16 = einsum(equation = var_6755_equation_0, values = (var_6429_cast_fp16, var_6669_cast_fp16))[name = tensor<string, []>("op_6755_cast_fp16")];
+            tensor<string, []> var_6757_equation_0 = const()[name = tensor<string, []>("op_6757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6429_cast_fp16, var_6670_cast_fp16))[name = tensor<string, []>("op_6757_cast_fp16")];
+            tensor<string, []> var_6759_equation_0 = const()[name = tensor<string, []>("op_6759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6759_cast_fp16 = einsum(equation = var_6759_equation_0, values = (var_6429_cast_fp16, var_6671_cast_fp16))[name = tensor<string, []>("op_6759_cast_fp16")];
+            tensor<string, []> var_6761_equation_0 = const()[name = tensor<string, []>("op_6761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6433_cast_fp16, var_6672_cast_fp16))[name = tensor<string, []>("op_6761_cast_fp16")];
+            tensor<string, []> var_6763_equation_0 = const()[name = tensor<string, []>("op_6763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6763_cast_fp16 = einsum(equation = var_6763_equation_0, values = (var_6433_cast_fp16, var_6673_cast_fp16))[name = tensor<string, []>("op_6763_cast_fp16")];
+            tensor<string, []> var_6765_equation_0 = const()[name = tensor<string, []>("op_6765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6433_cast_fp16, var_6674_cast_fp16))[name = tensor<string, []>("op_6765_cast_fp16")];
+            tensor<string, []> var_6767_equation_0 = const()[name = tensor<string, []>("op_6767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6767_cast_fp16 = einsum(equation = var_6767_equation_0, values = (var_6433_cast_fp16, var_6675_cast_fp16))[name = tensor<string, []>("op_6767_cast_fp16")];
+            tensor<string, []> var_6769_equation_0 = const()[name = tensor<string, []>("op_6769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6437_cast_fp16, var_6676_cast_fp16))[name = tensor<string, []>("op_6769_cast_fp16")];
+            tensor<string, []> var_6771_equation_0 = const()[name = tensor<string, []>("op_6771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6771_cast_fp16 = einsum(equation = var_6771_equation_0, values = (var_6437_cast_fp16, var_6677_cast_fp16))[name = tensor<string, []>("op_6771_cast_fp16")];
+            tensor<string, []> var_6773_equation_0 = const()[name = tensor<string, []>("op_6773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6437_cast_fp16, var_6678_cast_fp16))[name = tensor<string, []>("op_6773_cast_fp16")];
+            tensor<string, []> var_6775_equation_0 = const()[name = tensor<string, []>("op_6775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6775_cast_fp16 = einsum(equation = var_6775_equation_0, values = (var_6437_cast_fp16, var_6679_cast_fp16))[name = tensor<string, []>("op_6775_cast_fp16")];
+            tensor<bool, []> var_6777_interleave_0 = const()[name = tensor<string, []>("op_6777_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6777_cast_fp16 = concat(axis = var_5888, interleave = var_6777_interleave_0, values = (var_6681_cast_fp16, var_6683_cast_fp16, var_6685_cast_fp16, var_6687_cast_fp16))[name = tensor<string, []>("op_6777_cast_fp16")];
+            tensor<bool, []> var_6779_interleave_0 = const()[name = tensor<string, []>("op_6779_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6779_cast_fp16 = concat(axis = var_5888, interleave = var_6779_interleave_0, values = (var_6689_cast_fp16, var_6691_cast_fp16, var_6693_cast_fp16, var_6695_cast_fp16))[name = tensor<string, []>("op_6779_cast_fp16")];
+            tensor<bool, []> var_6781_interleave_0 = const()[name = tensor<string, []>("op_6781_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6781_cast_fp16 = concat(axis = var_5888, interleave = var_6781_interleave_0, values = (var_6697_cast_fp16, var_6699_cast_fp16, var_6701_cast_fp16, var_6703_cast_fp16))[name = tensor<string, []>("op_6781_cast_fp16")];
+            tensor<bool, []> var_6783_interleave_0 = const()[name = tensor<string, []>("op_6783_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6783_cast_fp16 = concat(axis = var_5888, interleave = var_6783_interleave_0, values = (var_6705_cast_fp16, var_6707_cast_fp16, var_6709_cast_fp16, var_6711_cast_fp16))[name = tensor<string, []>("op_6783_cast_fp16")];
+            tensor<bool, []> var_6785_interleave_0 = const()[name = tensor<string, []>("op_6785_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6785_cast_fp16 = concat(axis = var_5888, interleave = var_6785_interleave_0, values = (var_6713_cast_fp16, var_6715_cast_fp16, var_6717_cast_fp16, var_6719_cast_fp16))[name = tensor<string, []>("op_6785_cast_fp16")];
+            tensor<bool, []> var_6787_interleave_0 = const()[name = tensor<string, []>("op_6787_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6787_cast_fp16 = concat(axis = var_5888, interleave = var_6787_interleave_0, values = (var_6721_cast_fp16, var_6723_cast_fp16, var_6725_cast_fp16, var_6727_cast_fp16))[name = tensor<string, []>("op_6787_cast_fp16")];
+            tensor<bool, []> var_6789_interleave_0 = const()[name = tensor<string, []>("op_6789_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6789_cast_fp16 = concat(axis = var_5888, interleave = var_6789_interleave_0, values = (var_6729_cast_fp16, var_6731_cast_fp16, var_6733_cast_fp16, var_6735_cast_fp16))[name = tensor<string, []>("op_6789_cast_fp16")];
+            tensor<bool, []> var_6791_interleave_0 = const()[name = tensor<string, []>("op_6791_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6791_cast_fp16 = concat(axis = var_5888, interleave = var_6791_interleave_0, values = (var_6737_cast_fp16, var_6739_cast_fp16, var_6741_cast_fp16, var_6743_cast_fp16))[name = tensor<string, []>("op_6791_cast_fp16")];
+            tensor<bool, []> var_6793_interleave_0 = const()[name = tensor<string, []>("op_6793_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6793_cast_fp16 = concat(axis = var_5888, interleave = var_6793_interleave_0, values = (var_6745_cast_fp16, var_6747_cast_fp16, var_6749_cast_fp16, var_6751_cast_fp16))[name = tensor<string, []>("op_6793_cast_fp16")];
+            tensor<bool, []> var_6795_interleave_0 = const()[name = tensor<string, []>("op_6795_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6795_cast_fp16 = concat(axis = var_5888, interleave = var_6795_interleave_0, values = (var_6753_cast_fp16, var_6755_cast_fp16, var_6757_cast_fp16, var_6759_cast_fp16))[name = tensor<string, []>("op_6795_cast_fp16")];
+            tensor<bool, []> var_6797_interleave_0 = const()[name = tensor<string, []>("op_6797_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6797_cast_fp16 = concat(axis = var_5888, interleave = var_6797_interleave_0, values = (var_6761_cast_fp16, var_6763_cast_fp16, var_6765_cast_fp16, var_6767_cast_fp16))[name = tensor<string, []>("op_6797_cast_fp16")];
+            tensor<bool, []> var_6799_interleave_0 = const()[name = tensor<string, []>("op_6799_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6799_cast_fp16 = concat(axis = var_5888, interleave = var_6799_interleave_0, values = (var_6769_cast_fp16, var_6771_cast_fp16, var_6773_cast_fp16, var_6775_cast_fp16))[name = tensor<string, []>("op_6799_cast_fp16")];
+            tensor<bool, []> input_49_interleave_0 = const()[name = tensor<string, []>("input_49_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_49_cast_fp16 = concat(axis = var_5905, interleave = input_49_interleave_0, values = (var_6777_cast_fp16, var_6779_cast_fp16, var_6781_cast_fp16, var_6783_cast_fp16, var_6785_cast_fp16, var_6787_cast_fp16, var_6789_cast_fp16, var_6791_cast_fp16, var_6793_cast_fp16, var_6795_cast_fp16, var_6797_cast_fp16, var_6799_cast_fp16))[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [2]> var_6804 = const()[name = tensor<string, []>("op_6804"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6806 = const()[name = tensor<string, []>("op_6806"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_27_pad_type_0 = const()[name = tensor<string, []>("obj_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_27_pad_0 = const()[name = tensor<string, []>("obj_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94814784)))];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95994496)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_27_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = var_6806, groups = var_5905, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = var_6804, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> var_6812 = const()[name = tensor<string, []>("op_6812"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_27_cast_fp16 = reduce_mean(axes = var_6812, keep_dims = var_5906, x = inputs_27_cast_fp16)[name = tensor<string, []>("channels_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_sq_27_cast_fp16")];
+            tensor<int32, [1]> var_6816 = const()[name = tensor<string, []>("op_6816"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_6817_cast_fp16 = reduce_mean(axes = var_6816, keep_dims = var_5906, x = zero_mean_sq_27_cast_fp16)[name = tensor<string, []>("op_6817_cast_fp16")];
+            tensor<fp16, []> var_6818_to_fp16 = const()[name = tensor<string, []>("op_6818_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_6819_cast_fp16 = add(x = var_6817_cast_fp16, y = var_6818_to_fp16)[name = tensor<string, []>("op_6819_cast_fp16")];
+            tensor<fp16, []> denom_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_6819_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [768]> input_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95996096)))];
+            tensor<fp16, [768]> input_51_beta_0_to_fp16 = const()[name = tensor<string, []>("input_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95997696)))];
+            tensor<fp16, []> input_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<int32, [2]> var_6830 = const()[name = tensor<string, []>("op_6830"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6832 = const()[name = tensor<string, []>("op_6832"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_53_pad_type_0 = const()[name = tensor<string, []>("input_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_53_pad_0 = const()[name = tensor<string, []>("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95999296)))];
+            tensor<fp16, [3072]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100717952)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_53_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = var_6832, groups = var_5905, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = var_6830, weight = layers_6_fc1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<int32, [2]> var_6838 = const()[name = tensor<string, []>("op_6838"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6840 = const()[name = tensor<string, []>("op_6840"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100724160)))];
+            tensor<fp16, [768]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105442816)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_17_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = var_6840, groups = var_5905, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_6838, weight = layers_6_fc2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_6847 = const()[name = tensor<string, []>("op_6847"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_6864 = const()[name = tensor<string, []>("op_6864"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_6865 = const()[name = tensor<string, []>("op_6865"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_6875 = const()[name = tensor<string, []>("op_6875"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_29_cast_fp16 = reduce_mean(axes = var_6875, keep_dims = var_6865, x = inputs_29_cast_fp16)[name = tensor<string, []>("channels_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_sq_29_cast_fp16")];
+            tensor<int32, [1]> var_6879 = const()[name = tensor<string, []>("op_6879"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_6880_cast_fp16 = reduce_mean(axes = var_6879, keep_dims = var_6865, x = zero_mean_sq_29_cast_fp16)[name = tensor<string, []>("op_6880_cast_fp16")];
+            tensor<fp16, []> var_6881_to_fp16 = const()[name = tensor<string, []>("op_6881_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_6882_cast_fp16 = add(x = var_6880_cast_fp16, y = var_6881_to_fp16)[name = tensor<string, []>("op_6882_cast_fp16")];
+            tensor<fp16, []> denom_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_6882_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105444416)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105446016)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_6897 = const()[name = tensor<string, []>("op_6897"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6899 = const()[name = tensor<string, []>("op_6899"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105447616)))];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106627328)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_15_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = var_6899, groups = var_6864, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_6897, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<int32, [2]> var_6903 = const()[name = tensor<string, []>("op_6903"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6905 = const()[name = tensor<string, []>("op_6905"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106628928)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = conv(dilations = var_6905, groups = var_6864, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_6903, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<int32, [2]> var_6910 = const()[name = tensor<string, []>("op_6910"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6912 = const()[name = tensor<string, []>("op_6912"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107808640)))];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108988352)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = var_6912, groups = var_6864, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_6910, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_6919_begin_0 = const()[name = tensor<string, []>("op_6919_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6919_end_0 = const()[name = tensor<string, []>("op_6919_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6919_end_mask_0 = const()[name = tensor<string, []>("op_6919_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6919_cast_fp16 = slice_by_index(begin = var_6919_begin_0, end = var_6919_end_0, end_mask = var_6919_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6919_cast_fp16")];
+            tensor<int32, [4]> var_6923_begin_0 = const()[name = tensor<string, []>("op_6923_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_6923_end_0 = const()[name = tensor<string, []>("op_6923_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_6923_end_mask_0 = const()[name = tensor<string, []>("op_6923_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6923_cast_fp16 = slice_by_index(begin = var_6923_begin_0, end = var_6923_end_0, end_mask = var_6923_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6923_cast_fp16")];
+            tensor<int32, [4]> var_6927_begin_0 = const()[name = tensor<string, []>("op_6927_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_6927_end_0 = const()[name = tensor<string, []>("op_6927_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_6927_end_mask_0 = const()[name = tensor<string, []>("op_6927_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6927_cast_fp16 = slice_by_index(begin = var_6927_begin_0, end = var_6927_end_0, end_mask = var_6927_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6927_cast_fp16")];
+            tensor<int32, [4]> var_6931_begin_0 = const()[name = tensor<string, []>("op_6931_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_6931_end_0 = const()[name = tensor<string, []>("op_6931_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_6931_end_mask_0 = const()[name = tensor<string, []>("op_6931_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6931_cast_fp16 = slice_by_index(begin = var_6931_begin_0, end = var_6931_end_0, end_mask = var_6931_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6931_cast_fp16")];
+            tensor<int32, [4]> var_6935_begin_0 = const()[name = tensor<string, []>("op_6935_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_6935_end_0 = const()[name = tensor<string, []>("op_6935_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_6935_end_mask_0 = const()[name = tensor<string, []>("op_6935_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6935_cast_fp16 = slice_by_index(begin = var_6935_begin_0, end = var_6935_end_0, end_mask = var_6935_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6935_cast_fp16")];
+            tensor<int32, [4]> var_6939_begin_0 = const()[name = tensor<string, []>("op_6939_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_6939_end_0 = const()[name = tensor<string, []>("op_6939_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_6939_end_mask_0 = const()[name = tensor<string, []>("op_6939_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6939_cast_fp16 = slice_by_index(begin = var_6939_begin_0, end = var_6939_end_0, end_mask = var_6939_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6939_cast_fp16")];
+            tensor<int32, [4]> var_6943_begin_0 = const()[name = tensor<string, []>("op_6943_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_6943_end_0 = const()[name = tensor<string, []>("op_6943_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_6943_end_mask_0 = const()[name = tensor<string, []>("op_6943_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6943_cast_fp16 = slice_by_index(begin = var_6943_begin_0, end = var_6943_end_0, end_mask = var_6943_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6943_cast_fp16")];
+            tensor<int32, [4]> var_6947_begin_0 = const()[name = tensor<string, []>("op_6947_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_6947_end_0 = const()[name = tensor<string, []>("op_6947_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_6947_end_mask_0 = const()[name = tensor<string, []>("op_6947_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6947_cast_fp16 = slice_by_index(begin = var_6947_begin_0, end = var_6947_end_0, end_mask = var_6947_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6947_cast_fp16")];
+            tensor<int32, [4]> var_6951_begin_0 = const()[name = tensor<string, []>("op_6951_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_6951_end_0 = const()[name = tensor<string, []>("op_6951_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_6951_end_mask_0 = const()[name = tensor<string, []>("op_6951_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6951_cast_fp16 = slice_by_index(begin = var_6951_begin_0, end = var_6951_end_0, end_mask = var_6951_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6951_cast_fp16")];
+            tensor<int32, [4]> var_6955_begin_0 = const()[name = tensor<string, []>("op_6955_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_6955_end_0 = const()[name = tensor<string, []>("op_6955_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_6955_end_mask_0 = const()[name = tensor<string, []>("op_6955_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6955_cast_fp16 = slice_by_index(begin = var_6955_begin_0, end = var_6955_end_0, end_mask = var_6955_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6955_cast_fp16")];
+            tensor<int32, [4]> var_6959_begin_0 = const()[name = tensor<string, []>("op_6959_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6959_end_0 = const()[name = tensor<string, []>("op_6959_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6959_end_mask_0 = const()[name = tensor<string, []>("op_6959_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6959_cast_fp16 = slice_by_index(begin = var_6959_begin_0, end = var_6959_end_0, end_mask = var_6959_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6959_cast_fp16")];
+            tensor<int32, [4]> var_6963_begin_0 = const()[name = tensor<string, []>("op_6963_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6963_end_0 = const()[name = tensor<string, []>("op_6963_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6963_end_mask_0 = const()[name = tensor<string, []>("op_6963_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6963_cast_fp16 = slice_by_index(begin = var_6963_begin_0, end = var_6963_end_0, end_mask = var_6963_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6963_cast_fp16")];
+            tensor<int32, [4]> var_6972_begin_0 = const()[name = tensor<string, []>("op_6972_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6972_end_0 = const()[name = tensor<string, []>("op_6972_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6972_end_mask_0 = const()[name = tensor<string, []>("op_6972_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6972_cast_fp16 = slice_by_index(begin = var_6972_begin_0, end = var_6972_end_0, end_mask = var_6972_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6972_cast_fp16")];
+            tensor<int32, [4]> var_6979_begin_0 = const()[name = tensor<string, []>("op_6979_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6979_end_0 = const()[name = tensor<string, []>("op_6979_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6979_end_mask_0 = const()[name = tensor<string, []>("op_6979_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6979_cast_fp16 = slice_by_index(begin = var_6979_begin_0, end = var_6979_end_0, end_mask = var_6979_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6979_cast_fp16")];
+            tensor<int32, [4]> var_6986_begin_0 = const()[name = tensor<string, []>("op_6986_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6986_end_0 = const()[name = tensor<string, []>("op_6986_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6986_end_mask_0 = const()[name = tensor<string, []>("op_6986_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6986_cast_fp16 = slice_by_index(begin = var_6986_begin_0, end = var_6986_end_0, end_mask = var_6986_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6986_cast_fp16")];
+            tensor<int32, [4]> var_6993_begin_0 = const()[name = tensor<string, []>("op_6993_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6993_end_0 = const()[name = tensor<string, []>("op_6993_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6993_end_mask_0 = const()[name = tensor<string, []>("op_6993_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6993_cast_fp16 = slice_by_index(begin = var_6993_begin_0, end = var_6993_end_0, end_mask = var_6993_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6993_cast_fp16")];
+            tensor<int32, [4]> var_7000_begin_0 = const()[name = tensor<string, []>("op_7000_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7000_end_0 = const()[name = tensor<string, []>("op_7000_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7000_end_mask_0 = const()[name = tensor<string, []>("op_7000_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7000_cast_fp16 = slice_by_index(begin = var_7000_begin_0, end = var_7000_end_0, end_mask = var_7000_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7000_cast_fp16")];
+            tensor<int32, [4]> var_7007_begin_0 = const()[name = tensor<string, []>("op_7007_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7007_end_0 = const()[name = tensor<string, []>("op_7007_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7007_end_mask_0 = const()[name = tensor<string, []>("op_7007_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7007_cast_fp16 = slice_by_index(begin = var_7007_begin_0, end = var_7007_end_0, end_mask = var_7007_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7007_cast_fp16")];
+            tensor<int32, [4]> var_7014_begin_0 = const()[name = tensor<string, []>("op_7014_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7014_end_0 = const()[name = tensor<string, []>("op_7014_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7014_end_mask_0 = const()[name = tensor<string, []>("op_7014_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7014_cast_fp16 = slice_by_index(begin = var_7014_begin_0, end = var_7014_end_0, end_mask = var_7014_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7014_cast_fp16")];
+            tensor<int32, [4]> var_7021_begin_0 = const()[name = tensor<string, []>("op_7021_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7021_end_0 = const()[name = tensor<string, []>("op_7021_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7021_end_mask_0 = const()[name = tensor<string, []>("op_7021_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7021_cast_fp16 = slice_by_index(begin = var_7021_begin_0, end = var_7021_end_0, end_mask = var_7021_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7021_cast_fp16")];
+            tensor<int32, [4]> var_7028_begin_0 = const()[name = tensor<string, []>("op_7028_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7028_end_0 = const()[name = tensor<string, []>("op_7028_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7028_end_mask_0 = const()[name = tensor<string, []>("op_7028_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7028_cast_fp16 = slice_by_index(begin = var_7028_begin_0, end = var_7028_end_0, end_mask = var_7028_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7028_cast_fp16")];
+            tensor<int32, [4]> var_7035_begin_0 = const()[name = tensor<string, []>("op_7035_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7035_end_0 = const()[name = tensor<string, []>("op_7035_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7035_end_mask_0 = const()[name = tensor<string, []>("op_7035_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7035_cast_fp16 = slice_by_index(begin = var_7035_begin_0, end = var_7035_end_0, end_mask = var_7035_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7035_cast_fp16")];
+            tensor<int32, [4]> var_7042_begin_0 = const()[name = tensor<string, []>("op_7042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7042_end_0 = const()[name = tensor<string, []>("op_7042_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7042_end_mask_0 = const()[name = tensor<string, []>("op_7042_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7042_cast_fp16 = slice_by_index(begin = var_7042_begin_0, end = var_7042_end_0, end_mask = var_7042_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7042_cast_fp16")];
+            tensor<int32, [4]> var_7049_begin_0 = const()[name = tensor<string, []>("op_7049_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7049_end_0 = const()[name = tensor<string, []>("op_7049_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7049_end_mask_0 = const()[name = tensor<string, []>("op_7049_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7049_cast_fp16 = slice_by_index(begin = var_7049_begin_0, end = var_7049_end_0, end_mask = var_7049_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7049_cast_fp16")];
+            tensor<int32, [4]> var_7056_begin_0 = const()[name = tensor<string, []>("op_7056_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7056_end_0 = const()[name = tensor<string, []>("op_7056_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7056_end_mask_0 = const()[name = tensor<string, []>("op_7056_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7056_cast_fp16 = slice_by_index(begin = var_7056_begin_0, end = var_7056_end_0, end_mask = var_7056_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7056_cast_fp16")];
+            tensor<int32, [4]> var_7063_begin_0 = const()[name = tensor<string, []>("op_7063_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7063_end_0 = const()[name = tensor<string, []>("op_7063_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7063_end_mask_0 = const()[name = tensor<string, []>("op_7063_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7063_cast_fp16 = slice_by_index(begin = var_7063_begin_0, end = var_7063_end_0, end_mask = var_7063_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7063_cast_fp16")];
+            tensor<int32, [4]> var_7070_begin_0 = const()[name = tensor<string, []>("op_7070_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7070_end_0 = const()[name = tensor<string, []>("op_7070_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7070_end_mask_0 = const()[name = tensor<string, []>("op_7070_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7070_cast_fp16 = slice_by_index(begin = var_7070_begin_0, end = var_7070_end_0, end_mask = var_7070_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7070_cast_fp16")];
+            tensor<int32, [4]> var_7077_begin_0 = const()[name = tensor<string, []>("op_7077_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7077_end_0 = const()[name = tensor<string, []>("op_7077_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7077_end_mask_0 = const()[name = tensor<string, []>("op_7077_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7077_cast_fp16 = slice_by_index(begin = var_7077_begin_0, end = var_7077_end_0, end_mask = var_7077_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7077_cast_fp16")];
+            tensor<int32, [4]> var_7084_begin_0 = const()[name = tensor<string, []>("op_7084_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7084_end_0 = const()[name = tensor<string, []>("op_7084_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7084_end_mask_0 = const()[name = tensor<string, []>("op_7084_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7084_cast_fp16 = slice_by_index(begin = var_7084_begin_0, end = var_7084_end_0, end_mask = var_7084_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7084_cast_fp16")];
+            tensor<int32, [4]> var_7091_begin_0 = const()[name = tensor<string, []>("op_7091_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7091_end_0 = const()[name = tensor<string, []>("op_7091_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7091_end_mask_0 = const()[name = tensor<string, []>("op_7091_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7091_cast_fp16 = slice_by_index(begin = var_7091_begin_0, end = var_7091_end_0, end_mask = var_7091_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7091_cast_fp16")];
+            tensor<int32, [4]> var_7098_begin_0 = const()[name = tensor<string, []>("op_7098_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7098_end_0 = const()[name = tensor<string, []>("op_7098_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7098_end_mask_0 = const()[name = tensor<string, []>("op_7098_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7098_cast_fp16 = slice_by_index(begin = var_7098_begin_0, end = var_7098_end_0, end_mask = var_7098_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7098_cast_fp16")];
+            tensor<int32, [4]> var_7105_begin_0 = const()[name = tensor<string, []>("op_7105_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7105_end_0 = const()[name = tensor<string, []>("op_7105_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7105_end_mask_0 = const()[name = tensor<string, []>("op_7105_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7105_cast_fp16 = slice_by_index(begin = var_7105_begin_0, end = var_7105_end_0, end_mask = var_7105_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7105_cast_fp16")];
+            tensor<int32, [4]> var_7112_begin_0 = const()[name = tensor<string, []>("op_7112_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7112_end_0 = const()[name = tensor<string, []>("op_7112_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7112_end_mask_0 = const()[name = tensor<string, []>("op_7112_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7112_cast_fp16 = slice_by_index(begin = var_7112_begin_0, end = var_7112_end_0, end_mask = var_7112_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7112_cast_fp16")];
+            tensor<int32, [4]> var_7119_begin_0 = const()[name = tensor<string, []>("op_7119_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7119_end_0 = const()[name = tensor<string, []>("op_7119_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7119_end_mask_0 = const()[name = tensor<string, []>("op_7119_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7119_cast_fp16 = slice_by_index(begin = var_7119_begin_0, end = var_7119_end_0, end_mask = var_7119_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7119_cast_fp16")];
+            tensor<int32, [4]> var_7126_begin_0 = const()[name = tensor<string, []>("op_7126_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7126_end_0 = const()[name = tensor<string, []>("op_7126_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7126_end_mask_0 = const()[name = tensor<string, []>("op_7126_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7126_cast_fp16 = slice_by_index(begin = var_7126_begin_0, end = var_7126_end_0, end_mask = var_7126_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7126_cast_fp16")];
+            tensor<int32, [4]> var_7133_begin_0 = const()[name = tensor<string, []>("op_7133_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7133_end_0 = const()[name = tensor<string, []>("op_7133_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7133_end_mask_0 = const()[name = tensor<string, []>("op_7133_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7133_cast_fp16 = slice_by_index(begin = var_7133_begin_0, end = var_7133_end_0, end_mask = var_7133_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7133_cast_fp16")];
+            tensor<int32, [4]> var_7140_begin_0 = const()[name = tensor<string, []>("op_7140_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7140_end_0 = const()[name = tensor<string, []>("op_7140_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7140_end_mask_0 = const()[name = tensor<string, []>("op_7140_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7140_cast_fp16 = slice_by_index(begin = var_7140_begin_0, end = var_7140_end_0, end_mask = var_7140_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7140_cast_fp16")];
+            tensor<int32, [4]> var_7147_begin_0 = const()[name = tensor<string, []>("op_7147_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7147_end_0 = const()[name = tensor<string, []>("op_7147_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7147_end_mask_0 = const()[name = tensor<string, []>("op_7147_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7147_cast_fp16 = slice_by_index(begin = var_7147_begin_0, end = var_7147_end_0, end_mask = var_7147_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7147_cast_fp16")];
+            tensor<int32, [4]> var_7154_begin_0 = const()[name = tensor<string, []>("op_7154_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7154_end_0 = const()[name = tensor<string, []>("op_7154_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7154_end_mask_0 = const()[name = tensor<string, []>("op_7154_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7154_cast_fp16 = slice_by_index(begin = var_7154_begin_0, end = var_7154_end_0, end_mask = var_7154_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7154_cast_fp16")];
+            tensor<int32, [4]> var_7161_begin_0 = const()[name = tensor<string, []>("op_7161_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7161_end_0 = const()[name = tensor<string, []>("op_7161_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7161_end_mask_0 = const()[name = tensor<string, []>("op_7161_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7161_cast_fp16 = slice_by_index(begin = var_7161_begin_0, end = var_7161_end_0, end_mask = var_7161_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7161_cast_fp16")];
+            tensor<int32, [4]> var_7168_begin_0 = const()[name = tensor<string, []>("op_7168_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7168_end_0 = const()[name = tensor<string, []>("op_7168_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7168_end_mask_0 = const()[name = tensor<string, []>("op_7168_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7168_cast_fp16 = slice_by_index(begin = var_7168_begin_0, end = var_7168_end_0, end_mask = var_7168_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7168_cast_fp16")];
+            tensor<int32, [4]> var_7175_begin_0 = const()[name = tensor<string, []>("op_7175_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7175_end_0 = const()[name = tensor<string, []>("op_7175_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7175_end_mask_0 = const()[name = tensor<string, []>("op_7175_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7175_cast_fp16 = slice_by_index(begin = var_7175_begin_0, end = var_7175_end_0, end_mask = var_7175_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7175_cast_fp16")];
+            tensor<int32, [4]> var_7182_begin_0 = const()[name = tensor<string, []>("op_7182_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7182_end_0 = const()[name = tensor<string, []>("op_7182_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7182_end_mask_0 = const()[name = tensor<string, []>("op_7182_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7182_cast_fp16 = slice_by_index(begin = var_7182_begin_0, end = var_7182_end_0, end_mask = var_7182_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7182_cast_fp16")];
+            tensor<int32, [4]> var_7189_begin_0 = const()[name = tensor<string, []>("op_7189_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7189_end_0 = const()[name = tensor<string, []>("op_7189_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7189_end_mask_0 = const()[name = tensor<string, []>("op_7189_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7189_cast_fp16 = slice_by_index(begin = var_7189_begin_0, end = var_7189_end_0, end_mask = var_7189_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7189_cast_fp16")];
+            tensor<int32, [4]> var_7196_begin_0 = const()[name = tensor<string, []>("op_7196_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7196_end_0 = const()[name = tensor<string, []>("op_7196_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7196_end_mask_0 = const()[name = tensor<string, []>("op_7196_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7196_cast_fp16 = slice_by_index(begin = var_7196_begin_0, end = var_7196_end_0, end_mask = var_7196_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7196_cast_fp16")];
+            tensor<int32, [4]> var_7203_begin_0 = const()[name = tensor<string, []>("op_7203_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7203_end_0 = const()[name = tensor<string, []>("op_7203_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7203_end_mask_0 = const()[name = tensor<string, []>("op_7203_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7203_cast_fp16 = slice_by_index(begin = var_7203_begin_0, end = var_7203_end_0, end_mask = var_7203_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7203_cast_fp16")];
+            tensor<int32, [4]> var_7210_begin_0 = const()[name = tensor<string, []>("op_7210_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7210_end_0 = const()[name = tensor<string, []>("op_7210_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7210_end_mask_0 = const()[name = tensor<string, []>("op_7210_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7210_cast_fp16 = slice_by_index(begin = var_7210_begin_0, end = var_7210_end_0, end_mask = var_7210_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7210_cast_fp16")];
+            tensor<int32, [4]> var_7217_begin_0 = const()[name = tensor<string, []>("op_7217_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7217_end_0 = const()[name = tensor<string, []>("op_7217_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7217_end_mask_0 = const()[name = tensor<string, []>("op_7217_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7217_cast_fp16 = slice_by_index(begin = var_7217_begin_0, end = var_7217_end_0, end_mask = var_7217_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7217_cast_fp16")];
+            tensor<int32, [4]> var_7224_begin_0 = const()[name = tensor<string, []>("op_7224_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7224_end_0 = const()[name = tensor<string, []>("op_7224_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7224_end_mask_0 = const()[name = tensor<string, []>("op_7224_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7224_cast_fp16 = slice_by_index(begin = var_7224_begin_0, end = var_7224_end_0, end_mask = var_7224_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7224_cast_fp16")];
+            tensor<int32, [4]> var_7231_begin_0 = const()[name = tensor<string, []>("op_7231_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7231_end_0 = const()[name = tensor<string, []>("op_7231_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7231_end_mask_0 = const()[name = tensor<string, []>("op_7231_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7231_cast_fp16 = slice_by_index(begin = var_7231_begin_0, end = var_7231_end_0, end_mask = var_7231_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7231_cast_fp16")];
+            tensor<int32, [4]> var_7238_begin_0 = const()[name = tensor<string, []>("op_7238_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7238_end_0 = const()[name = tensor<string, []>("op_7238_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7238_end_mask_0 = const()[name = tensor<string, []>("op_7238_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7238_cast_fp16 = slice_by_index(begin = var_7238_begin_0, end = var_7238_end_0, end_mask = var_7238_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7238_cast_fp16")];
+            tensor<int32, [4]> var_7245_begin_0 = const()[name = tensor<string, []>("op_7245_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7245_end_0 = const()[name = tensor<string, []>("op_7245_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7245_end_mask_0 = const()[name = tensor<string, []>("op_7245_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7245_cast_fp16 = slice_by_index(begin = var_7245_begin_0, end = var_7245_end_0, end_mask = var_7245_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7245_cast_fp16")];
+            tensor<int32, [4]> var_7252_begin_0 = const()[name = tensor<string, []>("op_7252_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7252_end_0 = const()[name = tensor<string, []>("op_7252_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7252_end_mask_0 = const()[name = tensor<string, []>("op_7252_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7252_cast_fp16 = slice_by_index(begin = var_7252_begin_0, end = var_7252_end_0, end_mask = var_7252_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7252_cast_fp16")];
+            tensor<int32, [4]> var_7259_begin_0 = const()[name = tensor<string, []>("op_7259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7259_end_0 = const()[name = tensor<string, []>("op_7259_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7259_end_mask_0 = const()[name = tensor<string, []>("op_7259_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7259_cast_fp16 = slice_by_index(begin = var_7259_begin_0, end = var_7259_end_0, end_mask = var_7259_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7259_cast_fp16")];
+            tensor<int32, [4]> var_7266_begin_0 = const()[name = tensor<string, []>("op_7266_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7266_end_0 = const()[name = tensor<string, []>("op_7266_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7266_end_mask_0 = const()[name = tensor<string, []>("op_7266_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7266_cast_fp16 = slice_by_index(begin = var_7266_begin_0, end = var_7266_end_0, end_mask = var_7266_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7266_cast_fp16")];
+            tensor<int32, [4]> var_7273_begin_0 = const()[name = tensor<string, []>("op_7273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7273_end_0 = const()[name = tensor<string, []>("op_7273_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7273_end_mask_0 = const()[name = tensor<string, []>("op_7273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7273_cast_fp16 = slice_by_index(begin = var_7273_begin_0, end = var_7273_end_0, end_mask = var_7273_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7273_cast_fp16")];
+            tensor<int32, [4]> var_7280_begin_0 = const()[name = tensor<string, []>("op_7280_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7280_end_0 = const()[name = tensor<string, []>("op_7280_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7280_end_mask_0 = const()[name = tensor<string, []>("op_7280_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7280_cast_fp16 = slice_by_index(begin = var_7280_begin_0, end = var_7280_end_0, end_mask = var_7280_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7280_cast_fp16")];
+            tensor<int32, [4]> var_7287_begin_0 = const()[name = tensor<string, []>("op_7287_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7287_end_0 = const()[name = tensor<string, []>("op_7287_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7287_end_mask_0 = const()[name = tensor<string, []>("op_7287_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7287_cast_fp16 = slice_by_index(begin = var_7287_begin_0, end = var_7287_end_0, end_mask = var_7287_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7287_cast_fp16")];
+            tensor<int32, [4]> var_7294_begin_0 = const()[name = tensor<string, []>("op_7294_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7294_end_0 = const()[name = tensor<string, []>("op_7294_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7294_end_mask_0 = const()[name = tensor<string, []>("op_7294_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7294_cast_fp16 = slice_by_index(begin = var_7294_begin_0, end = var_7294_end_0, end_mask = var_7294_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7294_cast_fp16")];
+            tensor<int32, [4]> var_7301_begin_0 = const()[name = tensor<string, []>("op_7301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7301_end_0 = const()[name = tensor<string, []>("op_7301_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7301_end_mask_0 = const()[name = tensor<string, []>("op_7301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7301_cast_fp16 = slice_by_index(begin = var_7301_begin_0, end = var_7301_end_0, end_mask = var_7301_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7301_cast_fp16")];
+            tensor<int32, [4]> k_15_perm_0 = const()[name = tensor<string, []>("k_15_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_7306_begin_0 = const()[name = tensor<string, []>("op_7306_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7306_end_0 = const()[name = tensor<string, []>("op_7306_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_7306_end_mask_0 = const()[name = tensor<string, []>("op_7306_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_4 = transpose(perm = k_15_perm_0, x = key_15_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7306_cast_fp16 = slice_by_index(begin = var_7306_begin_0, end = var_7306_end_0, end_mask = var_7306_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7306_cast_fp16")];
+            tensor<int32, [4]> var_7310_begin_0 = const()[name = tensor<string, []>("op_7310_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_7310_end_0 = const()[name = tensor<string, []>("op_7310_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_7310_end_mask_0 = const()[name = tensor<string, []>("op_7310_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7310_cast_fp16 = slice_by_index(begin = var_7310_begin_0, end = var_7310_end_0, end_mask = var_7310_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7310_cast_fp16")];
+            tensor<int32, [4]> var_7314_begin_0 = const()[name = tensor<string, []>("op_7314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_7314_end_0 = const()[name = tensor<string, []>("op_7314_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_7314_end_mask_0 = const()[name = tensor<string, []>("op_7314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7314_cast_fp16 = slice_by_index(begin = var_7314_begin_0, end = var_7314_end_0, end_mask = var_7314_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7314_cast_fp16")];
+            tensor<int32, [4]> var_7318_begin_0 = const()[name = tensor<string, []>("op_7318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_7318_end_0 = const()[name = tensor<string, []>("op_7318_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_7318_end_mask_0 = const()[name = tensor<string, []>("op_7318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7318_cast_fp16 = slice_by_index(begin = var_7318_begin_0, end = var_7318_end_0, end_mask = var_7318_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7318_cast_fp16")];
+            tensor<int32, [4]> var_7322_begin_0 = const()[name = tensor<string, []>("op_7322_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_7322_end_0 = const()[name = tensor<string, []>("op_7322_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_7322_end_mask_0 = const()[name = tensor<string, []>("op_7322_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7322_cast_fp16 = slice_by_index(begin = var_7322_begin_0, end = var_7322_end_0, end_mask = var_7322_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7322_cast_fp16")];
+            tensor<int32, [4]> var_7326_begin_0 = const()[name = tensor<string, []>("op_7326_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_7326_end_0 = const()[name = tensor<string, []>("op_7326_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_7326_end_mask_0 = const()[name = tensor<string, []>("op_7326_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7326_cast_fp16 = slice_by_index(begin = var_7326_begin_0, end = var_7326_end_0, end_mask = var_7326_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7326_cast_fp16")];
+            tensor<int32, [4]> var_7330_begin_0 = const()[name = tensor<string, []>("op_7330_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_7330_end_0 = const()[name = tensor<string, []>("op_7330_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_7330_end_mask_0 = const()[name = tensor<string, []>("op_7330_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7330_cast_fp16 = slice_by_index(begin = var_7330_begin_0, end = var_7330_end_0, end_mask = var_7330_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7330_cast_fp16")];
+            tensor<int32, [4]> var_7334_begin_0 = const()[name = tensor<string, []>("op_7334_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_7334_end_0 = const()[name = tensor<string, []>("op_7334_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_7334_end_mask_0 = const()[name = tensor<string, []>("op_7334_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7334_cast_fp16 = slice_by_index(begin = var_7334_begin_0, end = var_7334_end_0, end_mask = var_7334_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7334_cast_fp16")];
+            tensor<int32, [4]> var_7338_begin_0 = const()[name = tensor<string, []>("op_7338_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_7338_end_0 = const()[name = tensor<string, []>("op_7338_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_7338_end_mask_0 = const()[name = tensor<string, []>("op_7338_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7338_cast_fp16 = slice_by_index(begin = var_7338_begin_0, end = var_7338_end_0, end_mask = var_7338_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7338_cast_fp16")];
+            tensor<int32, [4]> var_7342_begin_0 = const()[name = tensor<string, []>("op_7342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_7342_end_0 = const()[name = tensor<string, []>("op_7342_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_7342_end_mask_0 = const()[name = tensor<string, []>("op_7342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7342_cast_fp16 = slice_by_index(begin = var_7342_begin_0, end = var_7342_end_0, end_mask = var_7342_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7342_cast_fp16")];
+            tensor<int32, [4]> var_7346_begin_0 = const()[name = tensor<string, []>("op_7346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_7346_end_0 = const()[name = tensor<string, []>("op_7346_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_7346_end_mask_0 = const()[name = tensor<string, []>("op_7346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7346_cast_fp16 = slice_by_index(begin = var_7346_begin_0, end = var_7346_end_0, end_mask = var_7346_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7346_cast_fp16")];
+            tensor<int32, [4]> var_7350_begin_0 = const()[name = tensor<string, []>("op_7350_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_7350_end_0 = const()[name = tensor<string, []>("op_7350_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_7350_end_mask_0 = const()[name = tensor<string, []>("op_7350_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7350_cast_fp16 = slice_by_index(begin = var_7350_begin_0, end = var_7350_end_0, end_mask = var_7350_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7350_cast_fp16")];
+            tensor<int32, [4]> var_7352_begin_0 = const()[name = tensor<string, []>("op_7352_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7352_end_0 = const()[name = tensor<string, []>("op_7352_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7352_end_mask_0 = const()[name = tensor<string, []>("op_7352_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7352_cast_fp16 = slice_by_index(begin = var_7352_begin_0, end = var_7352_end_0, end_mask = var_7352_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7352_cast_fp16")];
+            tensor<int32, [4]> var_7356_begin_0 = const()[name = tensor<string, []>("op_7356_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_7356_end_0 = const()[name = tensor<string, []>("op_7356_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_7356_end_mask_0 = const()[name = tensor<string, []>("op_7356_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7356_cast_fp16 = slice_by_index(begin = var_7356_begin_0, end = var_7356_end_0, end_mask = var_7356_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7356_cast_fp16")];
+            tensor<int32, [4]> var_7360_begin_0 = const()[name = tensor<string, []>("op_7360_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_7360_end_0 = const()[name = tensor<string, []>("op_7360_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_7360_end_mask_0 = const()[name = tensor<string, []>("op_7360_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7360_cast_fp16 = slice_by_index(begin = var_7360_begin_0, end = var_7360_end_0, end_mask = var_7360_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7360_cast_fp16")];
+            tensor<int32, [4]> var_7364_begin_0 = const()[name = tensor<string, []>("op_7364_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_7364_end_0 = const()[name = tensor<string, []>("op_7364_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_7364_end_mask_0 = const()[name = tensor<string, []>("op_7364_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7364_cast_fp16 = slice_by_index(begin = var_7364_begin_0, end = var_7364_end_0, end_mask = var_7364_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7364_cast_fp16")];
+            tensor<int32, [4]> var_7368_begin_0 = const()[name = tensor<string, []>("op_7368_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_7368_end_0 = const()[name = tensor<string, []>("op_7368_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_7368_end_mask_0 = const()[name = tensor<string, []>("op_7368_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7368_cast_fp16 = slice_by_index(begin = var_7368_begin_0, end = var_7368_end_0, end_mask = var_7368_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7368_cast_fp16")];
+            tensor<int32, [4]> var_7372_begin_0 = const()[name = tensor<string, []>("op_7372_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_7372_end_0 = const()[name = tensor<string, []>("op_7372_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_7372_end_mask_0 = const()[name = tensor<string, []>("op_7372_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7372_cast_fp16 = slice_by_index(begin = var_7372_begin_0, end = var_7372_end_0, end_mask = var_7372_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7372_cast_fp16")];
+            tensor<int32, [4]> var_7376_begin_0 = const()[name = tensor<string, []>("op_7376_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_7376_end_0 = const()[name = tensor<string, []>("op_7376_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_7376_end_mask_0 = const()[name = tensor<string, []>("op_7376_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7376_cast_fp16 = slice_by_index(begin = var_7376_begin_0, end = var_7376_end_0, end_mask = var_7376_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7376_cast_fp16")];
+            tensor<int32, [4]> var_7380_begin_0 = const()[name = tensor<string, []>("op_7380_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_7380_end_0 = const()[name = tensor<string, []>("op_7380_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_7380_end_mask_0 = const()[name = tensor<string, []>("op_7380_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7380_cast_fp16 = slice_by_index(begin = var_7380_begin_0, end = var_7380_end_0, end_mask = var_7380_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7380_cast_fp16")];
+            tensor<int32, [4]> var_7384_begin_0 = const()[name = tensor<string, []>("op_7384_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_7384_end_0 = const()[name = tensor<string, []>("op_7384_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_7384_end_mask_0 = const()[name = tensor<string, []>("op_7384_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7384_cast_fp16 = slice_by_index(begin = var_7384_begin_0, end = var_7384_end_0, end_mask = var_7384_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7384_cast_fp16")];
+            tensor<int32, [4]> var_7388_begin_0 = const()[name = tensor<string, []>("op_7388_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_7388_end_0 = const()[name = tensor<string, []>("op_7388_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_7388_end_mask_0 = const()[name = tensor<string, []>("op_7388_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7388_cast_fp16 = slice_by_index(begin = var_7388_begin_0, end = var_7388_end_0, end_mask = var_7388_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7388_cast_fp16")];
+            tensor<int32, [4]> var_7392_begin_0 = const()[name = tensor<string, []>("op_7392_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_7392_end_0 = const()[name = tensor<string, []>("op_7392_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_7392_end_mask_0 = const()[name = tensor<string, []>("op_7392_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7392_cast_fp16 = slice_by_index(begin = var_7392_begin_0, end = var_7392_end_0, end_mask = var_7392_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7392_cast_fp16")];
+            tensor<int32, [4]> var_7396_begin_0 = const()[name = tensor<string, []>("op_7396_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_7396_end_0 = const()[name = tensor<string, []>("op_7396_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_7396_end_mask_0 = const()[name = tensor<string, []>("op_7396_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7396_cast_fp16 = slice_by_index(begin = var_7396_begin_0, end = var_7396_end_0, end_mask = var_7396_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7396_cast_fp16")];
+            tensor<string, []> var_7400_equation_0 = const()[name = tensor<string, []>("op_7400_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7400_cast_fp16 = einsum(equation = var_7400_equation_0, values = (var_7306_cast_fp16, var_6972_cast_fp16))[name = tensor<string, []>("op_7400_cast_fp16")];
+            tensor<fp16, []> var_7401_to_fp16 = const()[name = tensor<string, []>("op_7401_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_673_cast_fp16 = mul(x = var_7400_cast_fp16, y = var_7401_to_fp16)[name = tensor<string, []>("aw_chunk_673_cast_fp16")];
+            tensor<string, []> var_7404_equation_0 = const()[name = tensor<string, []>("op_7404_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7404_cast_fp16 = einsum(equation = var_7404_equation_0, values = (var_7306_cast_fp16, var_6979_cast_fp16))[name = tensor<string, []>("op_7404_cast_fp16")];
+            tensor<fp16, []> var_7405_to_fp16 = const()[name = tensor<string, []>("op_7405_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_675_cast_fp16 = mul(x = var_7404_cast_fp16, y = var_7405_to_fp16)[name = tensor<string, []>("aw_chunk_675_cast_fp16")];
+            tensor<string, []> var_7408_equation_0 = const()[name = tensor<string, []>("op_7408_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7408_cast_fp16 = einsum(equation = var_7408_equation_0, values = (var_7306_cast_fp16, var_6986_cast_fp16))[name = tensor<string, []>("op_7408_cast_fp16")];
+            tensor<fp16, []> var_7409_to_fp16 = const()[name = tensor<string, []>("op_7409_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_677_cast_fp16 = mul(x = var_7408_cast_fp16, y = var_7409_to_fp16)[name = tensor<string, []>("aw_chunk_677_cast_fp16")];
+            tensor<string, []> var_7412_equation_0 = const()[name = tensor<string, []>("op_7412_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7412_cast_fp16 = einsum(equation = var_7412_equation_0, values = (var_7306_cast_fp16, var_6993_cast_fp16))[name = tensor<string, []>("op_7412_cast_fp16")];
+            tensor<fp16, []> var_7413_to_fp16 = const()[name = tensor<string, []>("op_7413_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_679_cast_fp16 = mul(x = var_7412_cast_fp16, y = var_7413_to_fp16)[name = tensor<string, []>("aw_chunk_679_cast_fp16")];
+            tensor<string, []> var_7416_equation_0 = const()[name = tensor<string, []>("op_7416_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7416_cast_fp16 = einsum(equation = var_7416_equation_0, values = (var_7310_cast_fp16, var_7000_cast_fp16))[name = tensor<string, []>("op_7416_cast_fp16")];
+            tensor<fp16, []> var_7417_to_fp16 = const()[name = tensor<string, []>("op_7417_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_681_cast_fp16 = mul(x = var_7416_cast_fp16, y = var_7417_to_fp16)[name = tensor<string, []>("aw_chunk_681_cast_fp16")];
+            tensor<string, []> var_7420_equation_0 = const()[name = tensor<string, []>("op_7420_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7420_cast_fp16 = einsum(equation = var_7420_equation_0, values = (var_7310_cast_fp16, var_7007_cast_fp16))[name = tensor<string, []>("op_7420_cast_fp16")];
+            tensor<fp16, []> var_7421_to_fp16 = const()[name = tensor<string, []>("op_7421_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_683_cast_fp16 = mul(x = var_7420_cast_fp16, y = var_7421_to_fp16)[name = tensor<string, []>("aw_chunk_683_cast_fp16")];
+            tensor<string, []> var_7424_equation_0 = const()[name = tensor<string, []>("op_7424_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7424_cast_fp16 = einsum(equation = var_7424_equation_0, values = (var_7310_cast_fp16, var_7014_cast_fp16))[name = tensor<string, []>("op_7424_cast_fp16")];
+            tensor<fp16, []> var_7425_to_fp16 = const()[name = tensor<string, []>("op_7425_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_685_cast_fp16 = mul(x = var_7424_cast_fp16, y = var_7425_to_fp16)[name = tensor<string, []>("aw_chunk_685_cast_fp16")];
+            tensor<string, []> var_7428_equation_0 = const()[name = tensor<string, []>("op_7428_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7428_cast_fp16 = einsum(equation = var_7428_equation_0, values = (var_7310_cast_fp16, var_7021_cast_fp16))[name = tensor<string, []>("op_7428_cast_fp16")];
+            tensor<fp16, []> var_7429_to_fp16 = const()[name = tensor<string, []>("op_7429_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_687_cast_fp16 = mul(x = var_7428_cast_fp16, y = var_7429_to_fp16)[name = tensor<string, []>("aw_chunk_687_cast_fp16")];
+            tensor<string, []> var_7432_equation_0 = const()[name = tensor<string, []>("op_7432_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7432_cast_fp16 = einsum(equation = var_7432_equation_0, values = (var_7314_cast_fp16, var_7028_cast_fp16))[name = tensor<string, []>("op_7432_cast_fp16")];
+            tensor<fp16, []> var_7433_to_fp16 = const()[name = tensor<string, []>("op_7433_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_689_cast_fp16 = mul(x = var_7432_cast_fp16, y = var_7433_to_fp16)[name = tensor<string, []>("aw_chunk_689_cast_fp16")];
+            tensor<string, []> var_7436_equation_0 = const()[name = tensor<string, []>("op_7436_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7436_cast_fp16 = einsum(equation = var_7436_equation_0, values = (var_7314_cast_fp16, var_7035_cast_fp16))[name = tensor<string, []>("op_7436_cast_fp16")];
+            tensor<fp16, []> var_7437_to_fp16 = const()[name = tensor<string, []>("op_7437_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_691_cast_fp16 = mul(x = var_7436_cast_fp16, y = var_7437_to_fp16)[name = tensor<string, []>("aw_chunk_691_cast_fp16")];
+            tensor<string, []> var_7440_equation_0 = const()[name = tensor<string, []>("op_7440_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7440_cast_fp16 = einsum(equation = var_7440_equation_0, values = (var_7314_cast_fp16, var_7042_cast_fp16))[name = tensor<string, []>("op_7440_cast_fp16")];
+            tensor<fp16, []> var_7441_to_fp16 = const()[name = tensor<string, []>("op_7441_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_693_cast_fp16 = mul(x = var_7440_cast_fp16, y = var_7441_to_fp16)[name = tensor<string, []>("aw_chunk_693_cast_fp16")];
+            tensor<string, []> var_7444_equation_0 = const()[name = tensor<string, []>("op_7444_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7444_cast_fp16 = einsum(equation = var_7444_equation_0, values = (var_7314_cast_fp16, var_7049_cast_fp16))[name = tensor<string, []>("op_7444_cast_fp16")];
+            tensor<fp16, []> var_7445_to_fp16 = const()[name = tensor<string, []>("op_7445_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_695_cast_fp16 = mul(x = var_7444_cast_fp16, y = var_7445_to_fp16)[name = tensor<string, []>("aw_chunk_695_cast_fp16")];
+            tensor<string, []> var_7448_equation_0 = const()[name = tensor<string, []>("op_7448_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7448_cast_fp16 = einsum(equation = var_7448_equation_0, values = (var_7318_cast_fp16, var_7056_cast_fp16))[name = tensor<string, []>("op_7448_cast_fp16")];
+            tensor<fp16, []> var_7449_to_fp16 = const()[name = tensor<string, []>("op_7449_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_697_cast_fp16 = mul(x = var_7448_cast_fp16, y = var_7449_to_fp16)[name = tensor<string, []>("aw_chunk_697_cast_fp16")];
+            tensor<string, []> var_7452_equation_0 = const()[name = tensor<string, []>("op_7452_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7452_cast_fp16 = einsum(equation = var_7452_equation_0, values = (var_7318_cast_fp16, var_7063_cast_fp16))[name = tensor<string, []>("op_7452_cast_fp16")];
+            tensor<fp16, []> var_7453_to_fp16 = const()[name = tensor<string, []>("op_7453_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_699_cast_fp16 = mul(x = var_7452_cast_fp16, y = var_7453_to_fp16)[name = tensor<string, []>("aw_chunk_699_cast_fp16")];
+            tensor<string, []> var_7456_equation_0 = const()[name = tensor<string, []>("op_7456_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7456_cast_fp16 = einsum(equation = var_7456_equation_0, values = (var_7318_cast_fp16, var_7070_cast_fp16))[name = tensor<string, []>("op_7456_cast_fp16")];
+            tensor<fp16, []> var_7457_to_fp16 = const()[name = tensor<string, []>("op_7457_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_701_cast_fp16 = mul(x = var_7456_cast_fp16, y = var_7457_to_fp16)[name = tensor<string, []>("aw_chunk_701_cast_fp16")];
+            tensor<string, []> var_7460_equation_0 = const()[name = tensor<string, []>("op_7460_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7460_cast_fp16 = einsum(equation = var_7460_equation_0, values = (var_7318_cast_fp16, var_7077_cast_fp16))[name = tensor<string, []>("op_7460_cast_fp16")];
+            tensor<fp16, []> var_7461_to_fp16 = const()[name = tensor<string, []>("op_7461_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_703_cast_fp16 = mul(x = var_7460_cast_fp16, y = var_7461_to_fp16)[name = tensor<string, []>("aw_chunk_703_cast_fp16")];
+            tensor<string, []> var_7464_equation_0 = const()[name = tensor<string, []>("op_7464_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7464_cast_fp16 = einsum(equation = var_7464_equation_0, values = (var_7322_cast_fp16, var_7084_cast_fp16))[name = tensor<string, []>("op_7464_cast_fp16")];
+            tensor<fp16, []> var_7465_to_fp16 = const()[name = tensor<string, []>("op_7465_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_705_cast_fp16 = mul(x = var_7464_cast_fp16, y = var_7465_to_fp16)[name = tensor<string, []>("aw_chunk_705_cast_fp16")];
+            tensor<string, []> var_7468_equation_0 = const()[name = tensor<string, []>("op_7468_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7468_cast_fp16 = einsum(equation = var_7468_equation_0, values = (var_7322_cast_fp16, var_7091_cast_fp16))[name = tensor<string, []>("op_7468_cast_fp16")];
+            tensor<fp16, []> var_7469_to_fp16 = const()[name = tensor<string, []>("op_7469_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_707_cast_fp16 = mul(x = var_7468_cast_fp16, y = var_7469_to_fp16)[name = tensor<string, []>("aw_chunk_707_cast_fp16")];
+            tensor<string, []> var_7472_equation_0 = const()[name = tensor<string, []>("op_7472_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7472_cast_fp16 = einsum(equation = var_7472_equation_0, values = (var_7322_cast_fp16, var_7098_cast_fp16))[name = tensor<string, []>("op_7472_cast_fp16")];
+            tensor<fp16, []> var_7473_to_fp16 = const()[name = tensor<string, []>("op_7473_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_709_cast_fp16 = mul(x = var_7472_cast_fp16, y = var_7473_to_fp16)[name = tensor<string, []>("aw_chunk_709_cast_fp16")];
+            tensor<string, []> var_7476_equation_0 = const()[name = tensor<string, []>("op_7476_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7476_cast_fp16 = einsum(equation = var_7476_equation_0, values = (var_7322_cast_fp16, var_7105_cast_fp16))[name = tensor<string, []>("op_7476_cast_fp16")];
+            tensor<fp16, []> var_7477_to_fp16 = const()[name = tensor<string, []>("op_7477_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_711_cast_fp16 = mul(x = var_7476_cast_fp16, y = var_7477_to_fp16)[name = tensor<string, []>("aw_chunk_711_cast_fp16")];
+            tensor<string, []> var_7480_equation_0 = const()[name = tensor<string, []>("op_7480_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7480_cast_fp16 = einsum(equation = var_7480_equation_0, values = (var_7326_cast_fp16, var_7112_cast_fp16))[name = tensor<string, []>("op_7480_cast_fp16")];
+            tensor<fp16, []> var_7481_to_fp16 = const()[name = tensor<string, []>("op_7481_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_713_cast_fp16 = mul(x = var_7480_cast_fp16, y = var_7481_to_fp16)[name = tensor<string, []>("aw_chunk_713_cast_fp16")];
+            tensor<string, []> var_7484_equation_0 = const()[name = tensor<string, []>("op_7484_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7484_cast_fp16 = einsum(equation = var_7484_equation_0, values = (var_7326_cast_fp16, var_7119_cast_fp16))[name = tensor<string, []>("op_7484_cast_fp16")];
+            tensor<fp16, []> var_7485_to_fp16 = const()[name = tensor<string, []>("op_7485_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_715_cast_fp16 = mul(x = var_7484_cast_fp16, y = var_7485_to_fp16)[name = tensor<string, []>("aw_chunk_715_cast_fp16")];
+            tensor<string, []> var_7488_equation_0 = const()[name = tensor<string, []>("op_7488_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7488_cast_fp16 = einsum(equation = var_7488_equation_0, values = (var_7326_cast_fp16, var_7126_cast_fp16))[name = tensor<string, []>("op_7488_cast_fp16")];
+            tensor<fp16, []> var_7489_to_fp16 = const()[name = tensor<string, []>("op_7489_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_717_cast_fp16 = mul(x = var_7488_cast_fp16, y = var_7489_to_fp16)[name = tensor<string, []>("aw_chunk_717_cast_fp16")];
+            tensor<string, []> var_7492_equation_0 = const()[name = tensor<string, []>("op_7492_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7492_cast_fp16 = einsum(equation = var_7492_equation_0, values = (var_7326_cast_fp16, var_7133_cast_fp16))[name = tensor<string, []>("op_7492_cast_fp16")];
+            tensor<fp16, []> var_7493_to_fp16 = const()[name = tensor<string, []>("op_7493_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_719_cast_fp16 = mul(x = var_7492_cast_fp16, y = var_7493_to_fp16)[name = tensor<string, []>("aw_chunk_719_cast_fp16")];
+            tensor<string, []> var_7496_equation_0 = const()[name = tensor<string, []>("op_7496_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7496_cast_fp16 = einsum(equation = var_7496_equation_0, values = (var_7330_cast_fp16, var_7140_cast_fp16))[name = tensor<string, []>("op_7496_cast_fp16")];
+            tensor<fp16, []> var_7497_to_fp16 = const()[name = tensor<string, []>("op_7497_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_721_cast_fp16 = mul(x = var_7496_cast_fp16, y = var_7497_to_fp16)[name = tensor<string, []>("aw_chunk_721_cast_fp16")];
+            tensor<string, []> var_7500_equation_0 = const()[name = tensor<string, []>("op_7500_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7500_cast_fp16 = einsum(equation = var_7500_equation_0, values = (var_7330_cast_fp16, var_7147_cast_fp16))[name = tensor<string, []>("op_7500_cast_fp16")];
+            tensor<fp16, []> var_7501_to_fp16 = const()[name = tensor<string, []>("op_7501_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_723_cast_fp16 = mul(x = var_7500_cast_fp16, y = var_7501_to_fp16)[name = tensor<string, []>("aw_chunk_723_cast_fp16")];
+            tensor<string, []> var_7504_equation_0 = const()[name = tensor<string, []>("op_7504_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7504_cast_fp16 = einsum(equation = var_7504_equation_0, values = (var_7330_cast_fp16, var_7154_cast_fp16))[name = tensor<string, []>("op_7504_cast_fp16")];
+            tensor<fp16, []> var_7505_to_fp16 = const()[name = tensor<string, []>("op_7505_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_725_cast_fp16 = mul(x = var_7504_cast_fp16, y = var_7505_to_fp16)[name = tensor<string, []>("aw_chunk_725_cast_fp16")];
+            tensor<string, []> var_7508_equation_0 = const()[name = tensor<string, []>("op_7508_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7508_cast_fp16 = einsum(equation = var_7508_equation_0, values = (var_7330_cast_fp16, var_7161_cast_fp16))[name = tensor<string, []>("op_7508_cast_fp16")];
+            tensor<fp16, []> var_7509_to_fp16 = const()[name = tensor<string, []>("op_7509_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_727_cast_fp16 = mul(x = var_7508_cast_fp16, y = var_7509_to_fp16)[name = tensor<string, []>("aw_chunk_727_cast_fp16")];
+            tensor<string, []> var_7512_equation_0 = const()[name = tensor<string, []>("op_7512_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7512_cast_fp16 = einsum(equation = var_7512_equation_0, values = (var_7334_cast_fp16, var_7168_cast_fp16))[name = tensor<string, []>("op_7512_cast_fp16")];
+            tensor<fp16, []> var_7513_to_fp16 = const()[name = tensor<string, []>("op_7513_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_729_cast_fp16 = mul(x = var_7512_cast_fp16, y = var_7513_to_fp16)[name = tensor<string, []>("aw_chunk_729_cast_fp16")];
+            tensor<string, []> var_7516_equation_0 = const()[name = tensor<string, []>("op_7516_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7516_cast_fp16 = einsum(equation = var_7516_equation_0, values = (var_7334_cast_fp16, var_7175_cast_fp16))[name = tensor<string, []>("op_7516_cast_fp16")];
+            tensor<fp16, []> var_7517_to_fp16 = const()[name = tensor<string, []>("op_7517_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_731_cast_fp16 = mul(x = var_7516_cast_fp16, y = var_7517_to_fp16)[name = tensor<string, []>("aw_chunk_731_cast_fp16")];
+            tensor<string, []> var_7520_equation_0 = const()[name = tensor<string, []>("op_7520_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7520_cast_fp16 = einsum(equation = var_7520_equation_0, values = (var_7334_cast_fp16, var_7182_cast_fp16))[name = tensor<string, []>("op_7520_cast_fp16")];
+            tensor<fp16, []> var_7521_to_fp16 = const()[name = tensor<string, []>("op_7521_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_733_cast_fp16 = mul(x = var_7520_cast_fp16, y = var_7521_to_fp16)[name = tensor<string, []>("aw_chunk_733_cast_fp16")];
+            tensor<string, []> var_7524_equation_0 = const()[name = tensor<string, []>("op_7524_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7524_cast_fp16 = einsum(equation = var_7524_equation_0, values = (var_7334_cast_fp16, var_7189_cast_fp16))[name = tensor<string, []>("op_7524_cast_fp16")];
+            tensor<fp16, []> var_7525_to_fp16 = const()[name = tensor<string, []>("op_7525_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_735_cast_fp16 = mul(x = var_7524_cast_fp16, y = var_7525_to_fp16)[name = tensor<string, []>("aw_chunk_735_cast_fp16")];
+            tensor<string, []> var_7528_equation_0 = const()[name = tensor<string, []>("op_7528_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7528_cast_fp16 = einsum(equation = var_7528_equation_0, values = (var_7338_cast_fp16, var_7196_cast_fp16))[name = tensor<string, []>("op_7528_cast_fp16")];
+            tensor<fp16, []> var_7529_to_fp16 = const()[name = tensor<string, []>("op_7529_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_737_cast_fp16 = mul(x = var_7528_cast_fp16, y = var_7529_to_fp16)[name = tensor<string, []>("aw_chunk_737_cast_fp16")];
+            tensor<string, []> var_7532_equation_0 = const()[name = tensor<string, []>("op_7532_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7532_cast_fp16 = einsum(equation = var_7532_equation_0, values = (var_7338_cast_fp16, var_7203_cast_fp16))[name = tensor<string, []>("op_7532_cast_fp16")];
+            tensor<fp16, []> var_7533_to_fp16 = const()[name = tensor<string, []>("op_7533_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_739_cast_fp16 = mul(x = var_7532_cast_fp16, y = var_7533_to_fp16)[name = tensor<string, []>("aw_chunk_739_cast_fp16")];
+            tensor<string, []> var_7536_equation_0 = const()[name = tensor<string, []>("op_7536_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7536_cast_fp16 = einsum(equation = var_7536_equation_0, values = (var_7338_cast_fp16, var_7210_cast_fp16))[name = tensor<string, []>("op_7536_cast_fp16")];
+            tensor<fp16, []> var_7537_to_fp16 = const()[name = tensor<string, []>("op_7537_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_741_cast_fp16 = mul(x = var_7536_cast_fp16, y = var_7537_to_fp16)[name = tensor<string, []>("aw_chunk_741_cast_fp16")];
+            tensor<string, []> var_7540_equation_0 = const()[name = tensor<string, []>("op_7540_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7540_cast_fp16 = einsum(equation = var_7540_equation_0, values = (var_7338_cast_fp16, var_7217_cast_fp16))[name = tensor<string, []>("op_7540_cast_fp16")];
+            tensor<fp16, []> var_7541_to_fp16 = const()[name = tensor<string, []>("op_7541_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_743_cast_fp16 = mul(x = var_7540_cast_fp16, y = var_7541_to_fp16)[name = tensor<string, []>("aw_chunk_743_cast_fp16")];
+            tensor<string, []> var_7544_equation_0 = const()[name = tensor<string, []>("op_7544_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7544_cast_fp16 = einsum(equation = var_7544_equation_0, values = (var_7342_cast_fp16, var_7224_cast_fp16))[name = tensor<string, []>("op_7544_cast_fp16")];
+            tensor<fp16, []> var_7545_to_fp16 = const()[name = tensor<string, []>("op_7545_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_745_cast_fp16 = mul(x = var_7544_cast_fp16, y = var_7545_to_fp16)[name = tensor<string, []>("aw_chunk_745_cast_fp16")];
+            tensor<string, []> var_7548_equation_0 = const()[name = tensor<string, []>("op_7548_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7548_cast_fp16 = einsum(equation = var_7548_equation_0, values = (var_7342_cast_fp16, var_7231_cast_fp16))[name = tensor<string, []>("op_7548_cast_fp16")];
+            tensor<fp16, []> var_7549_to_fp16 = const()[name = tensor<string, []>("op_7549_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_747_cast_fp16 = mul(x = var_7548_cast_fp16, y = var_7549_to_fp16)[name = tensor<string, []>("aw_chunk_747_cast_fp16")];
+            tensor<string, []> var_7552_equation_0 = const()[name = tensor<string, []>("op_7552_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7552_cast_fp16 = einsum(equation = var_7552_equation_0, values = (var_7342_cast_fp16, var_7238_cast_fp16))[name = tensor<string, []>("op_7552_cast_fp16")];
+            tensor<fp16, []> var_7553_to_fp16 = const()[name = tensor<string, []>("op_7553_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_749_cast_fp16 = mul(x = var_7552_cast_fp16, y = var_7553_to_fp16)[name = tensor<string, []>("aw_chunk_749_cast_fp16")];
+            tensor<string, []> var_7556_equation_0 = const()[name = tensor<string, []>("op_7556_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7556_cast_fp16 = einsum(equation = var_7556_equation_0, values = (var_7342_cast_fp16, var_7245_cast_fp16))[name = tensor<string, []>("op_7556_cast_fp16")];
+            tensor<fp16, []> var_7557_to_fp16 = const()[name = tensor<string, []>("op_7557_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_751_cast_fp16 = mul(x = var_7556_cast_fp16, y = var_7557_to_fp16)[name = tensor<string, []>("aw_chunk_751_cast_fp16")];
+            tensor<string, []> var_7560_equation_0 = const()[name = tensor<string, []>("op_7560_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7560_cast_fp16 = einsum(equation = var_7560_equation_0, values = (var_7346_cast_fp16, var_7252_cast_fp16))[name = tensor<string, []>("op_7560_cast_fp16")];
+            tensor<fp16, []> var_7561_to_fp16 = const()[name = tensor<string, []>("op_7561_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_753_cast_fp16 = mul(x = var_7560_cast_fp16, y = var_7561_to_fp16)[name = tensor<string, []>("aw_chunk_753_cast_fp16")];
+            tensor<string, []> var_7564_equation_0 = const()[name = tensor<string, []>("op_7564_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7564_cast_fp16 = einsum(equation = var_7564_equation_0, values = (var_7346_cast_fp16, var_7259_cast_fp16))[name = tensor<string, []>("op_7564_cast_fp16")];
+            tensor<fp16, []> var_7565_to_fp16 = const()[name = tensor<string, []>("op_7565_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_755_cast_fp16 = mul(x = var_7564_cast_fp16, y = var_7565_to_fp16)[name = tensor<string, []>("aw_chunk_755_cast_fp16")];
+            tensor<string, []> var_7568_equation_0 = const()[name = tensor<string, []>("op_7568_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7568_cast_fp16 = einsum(equation = var_7568_equation_0, values = (var_7346_cast_fp16, var_7266_cast_fp16))[name = tensor<string, []>("op_7568_cast_fp16")];
+            tensor<fp16, []> var_7569_to_fp16 = const()[name = tensor<string, []>("op_7569_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_757_cast_fp16 = mul(x = var_7568_cast_fp16, y = var_7569_to_fp16)[name = tensor<string, []>("aw_chunk_757_cast_fp16")];
+            tensor<string, []> var_7572_equation_0 = const()[name = tensor<string, []>("op_7572_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7572_cast_fp16 = einsum(equation = var_7572_equation_0, values = (var_7346_cast_fp16, var_7273_cast_fp16))[name = tensor<string, []>("op_7572_cast_fp16")];
+            tensor<fp16, []> var_7573_to_fp16 = const()[name = tensor<string, []>("op_7573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_759_cast_fp16 = mul(x = var_7572_cast_fp16, y = var_7573_to_fp16)[name = tensor<string, []>("aw_chunk_759_cast_fp16")];
+            tensor<string, []> var_7576_equation_0 = const()[name = tensor<string, []>("op_7576_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7576_cast_fp16 = einsum(equation = var_7576_equation_0, values = (var_7350_cast_fp16, var_7280_cast_fp16))[name = tensor<string, []>("op_7576_cast_fp16")];
+            tensor<fp16, []> var_7577_to_fp16 = const()[name = tensor<string, []>("op_7577_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_761_cast_fp16 = mul(x = var_7576_cast_fp16, y = var_7577_to_fp16)[name = tensor<string, []>("aw_chunk_761_cast_fp16")];
+            tensor<string, []> var_7580_equation_0 = const()[name = tensor<string, []>("op_7580_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7580_cast_fp16 = einsum(equation = var_7580_equation_0, values = (var_7350_cast_fp16, var_7287_cast_fp16))[name = tensor<string, []>("op_7580_cast_fp16")];
+            tensor<fp16, []> var_7581_to_fp16 = const()[name = tensor<string, []>("op_7581_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_763_cast_fp16 = mul(x = var_7580_cast_fp16, y = var_7581_to_fp16)[name = tensor<string, []>("aw_chunk_763_cast_fp16")];
+            tensor<string, []> var_7584_equation_0 = const()[name = tensor<string, []>("op_7584_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7584_cast_fp16 = einsum(equation = var_7584_equation_0, values = (var_7350_cast_fp16, var_7294_cast_fp16))[name = tensor<string, []>("op_7584_cast_fp16")];
+            tensor<fp16, []> var_7585_to_fp16 = const()[name = tensor<string, []>("op_7585_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_765_cast_fp16 = mul(x = var_7584_cast_fp16, y = var_7585_to_fp16)[name = tensor<string, []>("aw_chunk_765_cast_fp16")];
+            tensor<string, []> var_7588_equation_0 = const()[name = tensor<string, []>("op_7588_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7588_cast_fp16 = einsum(equation = var_7588_equation_0, values = (var_7350_cast_fp16, var_7301_cast_fp16))[name = tensor<string, []>("op_7588_cast_fp16")];
+            tensor<fp16, []> var_7589_to_fp16 = const()[name = tensor<string, []>("op_7589_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_767_cast_fp16 = mul(x = var_7588_cast_fp16, y = var_7589_to_fp16)[name = tensor<string, []>("aw_chunk_767_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7591_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_673_cast_fp16)[name = tensor<string, []>("op_7591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7592_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_675_cast_fp16)[name = tensor<string, []>("op_7592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7593_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_677_cast_fp16)[name = tensor<string, []>("op_7593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7594_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_679_cast_fp16)[name = tensor<string, []>("op_7594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7595_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_681_cast_fp16)[name = tensor<string, []>("op_7595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7596_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_683_cast_fp16)[name = tensor<string, []>("op_7596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7597_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_685_cast_fp16)[name = tensor<string, []>("op_7597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7598_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_687_cast_fp16)[name = tensor<string, []>("op_7598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7599_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_689_cast_fp16)[name = tensor<string, []>("op_7599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7600_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_691_cast_fp16)[name = tensor<string, []>("op_7600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7601_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_693_cast_fp16)[name = tensor<string, []>("op_7601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7602_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_695_cast_fp16)[name = tensor<string, []>("op_7602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7603_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_697_cast_fp16)[name = tensor<string, []>("op_7603_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7604_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_699_cast_fp16)[name = tensor<string, []>("op_7604_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7605_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_701_cast_fp16)[name = tensor<string, []>("op_7605_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7606_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_703_cast_fp16)[name = tensor<string, []>("op_7606_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7607_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_705_cast_fp16)[name = tensor<string, []>("op_7607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7608_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_707_cast_fp16)[name = tensor<string, []>("op_7608_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7609_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_709_cast_fp16)[name = tensor<string, []>("op_7609_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7610_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_711_cast_fp16)[name = tensor<string, []>("op_7610_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7611_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_713_cast_fp16)[name = tensor<string, []>("op_7611_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7612_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_715_cast_fp16)[name = tensor<string, []>("op_7612_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7613_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_717_cast_fp16)[name = tensor<string, []>("op_7613_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7614_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_719_cast_fp16)[name = tensor<string, []>("op_7614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7615_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_721_cast_fp16)[name = tensor<string, []>("op_7615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7616_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_723_cast_fp16)[name = tensor<string, []>("op_7616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7617_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_725_cast_fp16)[name = tensor<string, []>("op_7617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7618_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_727_cast_fp16)[name = tensor<string, []>("op_7618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7619_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_729_cast_fp16)[name = tensor<string, []>("op_7619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7620_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_731_cast_fp16)[name = tensor<string, []>("op_7620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7621_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_733_cast_fp16)[name = tensor<string, []>("op_7621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7622_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_735_cast_fp16)[name = tensor<string, []>("op_7622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7623_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_737_cast_fp16)[name = tensor<string, []>("op_7623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7624_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_739_cast_fp16)[name = tensor<string, []>("op_7624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7625_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_741_cast_fp16)[name = tensor<string, []>("op_7625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7626_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_743_cast_fp16)[name = tensor<string, []>("op_7626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7627_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_745_cast_fp16)[name = tensor<string, []>("op_7627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7628_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_747_cast_fp16)[name = tensor<string, []>("op_7628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7629_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_749_cast_fp16)[name = tensor<string, []>("op_7629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7630_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_751_cast_fp16)[name = tensor<string, []>("op_7630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7631_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_753_cast_fp16)[name = tensor<string, []>("op_7631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7632_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_755_cast_fp16)[name = tensor<string, []>("op_7632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7633_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_757_cast_fp16)[name = tensor<string, []>("op_7633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7634_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_759_cast_fp16)[name = tensor<string, []>("op_7634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7635_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_761_cast_fp16)[name = tensor<string, []>("op_7635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7636_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_763_cast_fp16)[name = tensor<string, []>("op_7636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7637_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_765_cast_fp16)[name = tensor<string, []>("op_7637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7638_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_767_cast_fp16)[name = tensor<string, []>("op_7638_cast_fp16")];
+            tensor<string, []> var_7640_equation_0 = const()[name = tensor<string, []>("op_7640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7640_cast_fp16 = einsum(equation = var_7640_equation_0, values = (var_7352_cast_fp16, var_7591_cast_fp16))[name = tensor<string, []>("op_7640_cast_fp16")];
+            tensor<string, []> var_7642_equation_0 = const()[name = tensor<string, []>("op_7642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7642_cast_fp16 = einsum(equation = var_7642_equation_0, values = (var_7352_cast_fp16, var_7592_cast_fp16))[name = tensor<string, []>("op_7642_cast_fp16")];
+            tensor<string, []> var_7644_equation_0 = const()[name = tensor<string, []>("op_7644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7644_cast_fp16 = einsum(equation = var_7644_equation_0, values = (var_7352_cast_fp16, var_7593_cast_fp16))[name = tensor<string, []>("op_7644_cast_fp16")];
+            tensor<string, []> var_7646_equation_0 = const()[name = tensor<string, []>("op_7646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7646_cast_fp16 = einsum(equation = var_7646_equation_0, values = (var_7352_cast_fp16, var_7594_cast_fp16))[name = tensor<string, []>("op_7646_cast_fp16")];
+            tensor<string, []> var_7648_equation_0 = const()[name = tensor<string, []>("op_7648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7648_cast_fp16 = einsum(equation = var_7648_equation_0, values = (var_7356_cast_fp16, var_7595_cast_fp16))[name = tensor<string, []>("op_7648_cast_fp16")];
+            tensor<string, []> var_7650_equation_0 = const()[name = tensor<string, []>("op_7650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7650_cast_fp16 = einsum(equation = var_7650_equation_0, values = (var_7356_cast_fp16, var_7596_cast_fp16))[name = tensor<string, []>("op_7650_cast_fp16")];
+            tensor<string, []> var_7652_equation_0 = const()[name = tensor<string, []>("op_7652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7652_cast_fp16 = einsum(equation = var_7652_equation_0, values = (var_7356_cast_fp16, var_7597_cast_fp16))[name = tensor<string, []>("op_7652_cast_fp16")];
+            tensor<string, []> var_7654_equation_0 = const()[name = tensor<string, []>("op_7654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7654_cast_fp16 = einsum(equation = var_7654_equation_0, values = (var_7356_cast_fp16, var_7598_cast_fp16))[name = tensor<string, []>("op_7654_cast_fp16")];
+            tensor<string, []> var_7656_equation_0 = const()[name = tensor<string, []>("op_7656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7656_cast_fp16 = einsum(equation = var_7656_equation_0, values = (var_7360_cast_fp16, var_7599_cast_fp16))[name = tensor<string, []>("op_7656_cast_fp16")];
+            tensor<string, []> var_7658_equation_0 = const()[name = tensor<string, []>("op_7658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7658_cast_fp16 = einsum(equation = var_7658_equation_0, values = (var_7360_cast_fp16, var_7600_cast_fp16))[name = tensor<string, []>("op_7658_cast_fp16")];
+            tensor<string, []> var_7660_equation_0 = const()[name = tensor<string, []>("op_7660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7660_cast_fp16 = einsum(equation = var_7660_equation_0, values = (var_7360_cast_fp16, var_7601_cast_fp16))[name = tensor<string, []>("op_7660_cast_fp16")];
+            tensor<string, []> var_7662_equation_0 = const()[name = tensor<string, []>("op_7662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7662_cast_fp16 = einsum(equation = var_7662_equation_0, values = (var_7360_cast_fp16, var_7602_cast_fp16))[name = tensor<string, []>("op_7662_cast_fp16")];
+            tensor<string, []> var_7664_equation_0 = const()[name = tensor<string, []>("op_7664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7664_cast_fp16 = einsum(equation = var_7664_equation_0, values = (var_7364_cast_fp16, var_7603_cast_fp16))[name = tensor<string, []>("op_7664_cast_fp16")];
+            tensor<string, []> var_7666_equation_0 = const()[name = tensor<string, []>("op_7666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7666_cast_fp16 = einsum(equation = var_7666_equation_0, values = (var_7364_cast_fp16, var_7604_cast_fp16))[name = tensor<string, []>("op_7666_cast_fp16")];
+            tensor<string, []> var_7668_equation_0 = const()[name = tensor<string, []>("op_7668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7668_cast_fp16 = einsum(equation = var_7668_equation_0, values = (var_7364_cast_fp16, var_7605_cast_fp16))[name = tensor<string, []>("op_7668_cast_fp16")];
+            tensor<string, []> var_7670_equation_0 = const()[name = tensor<string, []>("op_7670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7670_cast_fp16 = einsum(equation = var_7670_equation_0, values = (var_7364_cast_fp16, var_7606_cast_fp16))[name = tensor<string, []>("op_7670_cast_fp16")];
+            tensor<string, []> var_7672_equation_0 = const()[name = tensor<string, []>("op_7672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7672_cast_fp16 = einsum(equation = var_7672_equation_0, values = (var_7368_cast_fp16, var_7607_cast_fp16))[name = tensor<string, []>("op_7672_cast_fp16")];
+            tensor<string, []> var_7674_equation_0 = const()[name = tensor<string, []>("op_7674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7674_cast_fp16 = einsum(equation = var_7674_equation_0, values = (var_7368_cast_fp16, var_7608_cast_fp16))[name = tensor<string, []>("op_7674_cast_fp16")];
+            tensor<string, []> var_7676_equation_0 = const()[name = tensor<string, []>("op_7676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7676_cast_fp16 = einsum(equation = var_7676_equation_0, values = (var_7368_cast_fp16, var_7609_cast_fp16))[name = tensor<string, []>("op_7676_cast_fp16")];
+            tensor<string, []> var_7678_equation_0 = const()[name = tensor<string, []>("op_7678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7678_cast_fp16 = einsum(equation = var_7678_equation_0, values = (var_7368_cast_fp16, var_7610_cast_fp16))[name = tensor<string, []>("op_7678_cast_fp16")];
+            tensor<string, []> var_7680_equation_0 = const()[name = tensor<string, []>("op_7680_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7680_cast_fp16 = einsum(equation = var_7680_equation_0, values = (var_7372_cast_fp16, var_7611_cast_fp16))[name = tensor<string, []>("op_7680_cast_fp16")];
+            tensor<string, []> var_7682_equation_0 = const()[name = tensor<string, []>("op_7682_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7682_cast_fp16 = einsum(equation = var_7682_equation_0, values = (var_7372_cast_fp16, var_7612_cast_fp16))[name = tensor<string, []>("op_7682_cast_fp16")];
+            tensor<string, []> var_7684_equation_0 = const()[name = tensor<string, []>("op_7684_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7684_cast_fp16 = einsum(equation = var_7684_equation_0, values = (var_7372_cast_fp16, var_7613_cast_fp16))[name = tensor<string, []>("op_7684_cast_fp16")];
+            tensor<string, []> var_7686_equation_0 = const()[name = tensor<string, []>("op_7686_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7686_cast_fp16 = einsum(equation = var_7686_equation_0, values = (var_7372_cast_fp16, var_7614_cast_fp16))[name = tensor<string, []>("op_7686_cast_fp16")];
+            tensor<string, []> var_7688_equation_0 = const()[name = tensor<string, []>("op_7688_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7688_cast_fp16 = einsum(equation = var_7688_equation_0, values = (var_7376_cast_fp16, var_7615_cast_fp16))[name = tensor<string, []>("op_7688_cast_fp16")];
+            tensor<string, []> var_7690_equation_0 = const()[name = tensor<string, []>("op_7690_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7690_cast_fp16 = einsum(equation = var_7690_equation_0, values = (var_7376_cast_fp16, var_7616_cast_fp16))[name = tensor<string, []>("op_7690_cast_fp16")];
+            tensor<string, []> var_7692_equation_0 = const()[name = tensor<string, []>("op_7692_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7692_cast_fp16 = einsum(equation = var_7692_equation_0, values = (var_7376_cast_fp16, var_7617_cast_fp16))[name = tensor<string, []>("op_7692_cast_fp16")];
+            tensor<string, []> var_7694_equation_0 = const()[name = tensor<string, []>("op_7694_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7694_cast_fp16 = einsum(equation = var_7694_equation_0, values = (var_7376_cast_fp16, var_7618_cast_fp16))[name = tensor<string, []>("op_7694_cast_fp16")];
+            tensor<string, []> var_7696_equation_0 = const()[name = tensor<string, []>("op_7696_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7696_cast_fp16 = einsum(equation = var_7696_equation_0, values = (var_7380_cast_fp16, var_7619_cast_fp16))[name = tensor<string, []>("op_7696_cast_fp16")];
+            tensor<string, []> var_7698_equation_0 = const()[name = tensor<string, []>("op_7698_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7698_cast_fp16 = einsum(equation = var_7698_equation_0, values = (var_7380_cast_fp16, var_7620_cast_fp16))[name = tensor<string, []>("op_7698_cast_fp16")];
+            tensor<string, []> var_7700_equation_0 = const()[name = tensor<string, []>("op_7700_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7700_cast_fp16 = einsum(equation = var_7700_equation_0, values = (var_7380_cast_fp16, var_7621_cast_fp16))[name = tensor<string, []>("op_7700_cast_fp16")];
+            tensor<string, []> var_7702_equation_0 = const()[name = tensor<string, []>("op_7702_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7702_cast_fp16 = einsum(equation = var_7702_equation_0, values = (var_7380_cast_fp16, var_7622_cast_fp16))[name = tensor<string, []>("op_7702_cast_fp16")];
+            tensor<string, []> var_7704_equation_0 = const()[name = tensor<string, []>("op_7704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7704_cast_fp16 = einsum(equation = var_7704_equation_0, values = (var_7384_cast_fp16, var_7623_cast_fp16))[name = tensor<string, []>("op_7704_cast_fp16")];
+            tensor<string, []> var_7706_equation_0 = const()[name = tensor<string, []>("op_7706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7706_cast_fp16 = einsum(equation = var_7706_equation_0, values = (var_7384_cast_fp16, var_7624_cast_fp16))[name = tensor<string, []>("op_7706_cast_fp16")];
+            tensor<string, []> var_7708_equation_0 = const()[name = tensor<string, []>("op_7708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7708_cast_fp16 = einsum(equation = var_7708_equation_0, values = (var_7384_cast_fp16, var_7625_cast_fp16))[name = tensor<string, []>("op_7708_cast_fp16")];
+            tensor<string, []> var_7710_equation_0 = const()[name = tensor<string, []>("op_7710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7710_cast_fp16 = einsum(equation = var_7710_equation_0, values = (var_7384_cast_fp16, var_7626_cast_fp16))[name = tensor<string, []>("op_7710_cast_fp16")];
+            tensor<string, []> var_7712_equation_0 = const()[name = tensor<string, []>("op_7712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7712_cast_fp16 = einsum(equation = var_7712_equation_0, values = (var_7388_cast_fp16, var_7627_cast_fp16))[name = tensor<string, []>("op_7712_cast_fp16")];
+            tensor<string, []> var_7714_equation_0 = const()[name = tensor<string, []>("op_7714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7714_cast_fp16 = einsum(equation = var_7714_equation_0, values = (var_7388_cast_fp16, var_7628_cast_fp16))[name = tensor<string, []>("op_7714_cast_fp16")];
+            tensor<string, []> var_7716_equation_0 = const()[name = tensor<string, []>("op_7716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7716_cast_fp16 = einsum(equation = var_7716_equation_0, values = (var_7388_cast_fp16, var_7629_cast_fp16))[name = tensor<string, []>("op_7716_cast_fp16")];
+            tensor<string, []> var_7718_equation_0 = const()[name = tensor<string, []>("op_7718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7718_cast_fp16 = einsum(equation = var_7718_equation_0, values = (var_7388_cast_fp16, var_7630_cast_fp16))[name = tensor<string, []>("op_7718_cast_fp16")];
+            tensor<string, []> var_7720_equation_0 = const()[name = tensor<string, []>("op_7720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7720_cast_fp16 = einsum(equation = var_7720_equation_0, values = (var_7392_cast_fp16, var_7631_cast_fp16))[name = tensor<string, []>("op_7720_cast_fp16")];
+            tensor<string, []> var_7722_equation_0 = const()[name = tensor<string, []>("op_7722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7722_cast_fp16 = einsum(equation = var_7722_equation_0, values = (var_7392_cast_fp16, var_7632_cast_fp16))[name = tensor<string, []>("op_7722_cast_fp16")];
+            tensor<string, []> var_7724_equation_0 = const()[name = tensor<string, []>("op_7724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7724_cast_fp16 = einsum(equation = var_7724_equation_0, values = (var_7392_cast_fp16, var_7633_cast_fp16))[name = tensor<string, []>("op_7724_cast_fp16")];
+            tensor<string, []> var_7726_equation_0 = const()[name = tensor<string, []>("op_7726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7726_cast_fp16 = einsum(equation = var_7726_equation_0, values = (var_7392_cast_fp16, var_7634_cast_fp16))[name = tensor<string, []>("op_7726_cast_fp16")];
+            tensor<string, []> var_7728_equation_0 = const()[name = tensor<string, []>("op_7728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7728_cast_fp16 = einsum(equation = var_7728_equation_0, values = (var_7396_cast_fp16, var_7635_cast_fp16))[name = tensor<string, []>("op_7728_cast_fp16")];
+            tensor<string, []> var_7730_equation_0 = const()[name = tensor<string, []>("op_7730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7730_cast_fp16 = einsum(equation = var_7730_equation_0, values = (var_7396_cast_fp16, var_7636_cast_fp16))[name = tensor<string, []>("op_7730_cast_fp16")];
+            tensor<string, []> var_7732_equation_0 = const()[name = tensor<string, []>("op_7732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7732_cast_fp16 = einsum(equation = var_7732_equation_0, values = (var_7396_cast_fp16, var_7637_cast_fp16))[name = tensor<string, []>("op_7732_cast_fp16")];
+            tensor<string, []> var_7734_equation_0 = const()[name = tensor<string, []>("op_7734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7734_cast_fp16 = einsum(equation = var_7734_equation_0, values = (var_7396_cast_fp16, var_7638_cast_fp16))[name = tensor<string, []>("op_7734_cast_fp16")];
+            tensor<bool, []> var_7736_interleave_0 = const()[name = tensor<string, []>("op_7736_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7736_cast_fp16 = concat(axis = var_6847, interleave = var_7736_interleave_0, values = (var_7640_cast_fp16, var_7642_cast_fp16, var_7644_cast_fp16, var_7646_cast_fp16))[name = tensor<string, []>("op_7736_cast_fp16")];
+            tensor<bool, []> var_7738_interleave_0 = const()[name = tensor<string, []>("op_7738_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7738_cast_fp16 = concat(axis = var_6847, interleave = var_7738_interleave_0, values = (var_7648_cast_fp16, var_7650_cast_fp16, var_7652_cast_fp16, var_7654_cast_fp16))[name = tensor<string, []>("op_7738_cast_fp16")];
+            tensor<bool, []> var_7740_interleave_0 = const()[name = tensor<string, []>("op_7740_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7740_cast_fp16 = concat(axis = var_6847, interleave = var_7740_interleave_0, values = (var_7656_cast_fp16, var_7658_cast_fp16, var_7660_cast_fp16, var_7662_cast_fp16))[name = tensor<string, []>("op_7740_cast_fp16")];
+            tensor<bool, []> var_7742_interleave_0 = const()[name = tensor<string, []>("op_7742_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7742_cast_fp16 = concat(axis = var_6847, interleave = var_7742_interleave_0, values = (var_7664_cast_fp16, var_7666_cast_fp16, var_7668_cast_fp16, var_7670_cast_fp16))[name = tensor<string, []>("op_7742_cast_fp16")];
+            tensor<bool, []> var_7744_interleave_0 = const()[name = tensor<string, []>("op_7744_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7744_cast_fp16 = concat(axis = var_6847, interleave = var_7744_interleave_0, values = (var_7672_cast_fp16, var_7674_cast_fp16, var_7676_cast_fp16, var_7678_cast_fp16))[name = tensor<string, []>("op_7744_cast_fp16")];
+            tensor<bool, []> var_7746_interleave_0 = const()[name = tensor<string, []>("op_7746_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7746_cast_fp16 = concat(axis = var_6847, interleave = var_7746_interleave_0, values = (var_7680_cast_fp16, var_7682_cast_fp16, var_7684_cast_fp16, var_7686_cast_fp16))[name = tensor<string, []>("op_7746_cast_fp16")];
+            tensor<bool, []> var_7748_interleave_0 = const()[name = tensor<string, []>("op_7748_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7748_cast_fp16 = concat(axis = var_6847, interleave = var_7748_interleave_0, values = (var_7688_cast_fp16, var_7690_cast_fp16, var_7692_cast_fp16, var_7694_cast_fp16))[name = tensor<string, []>("op_7748_cast_fp16")];
+            tensor<bool, []> var_7750_interleave_0 = const()[name = tensor<string, []>("op_7750_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7750_cast_fp16 = concat(axis = var_6847, interleave = var_7750_interleave_0, values = (var_7696_cast_fp16, var_7698_cast_fp16, var_7700_cast_fp16, var_7702_cast_fp16))[name = tensor<string, []>("op_7750_cast_fp16")];
+            tensor<bool, []> var_7752_interleave_0 = const()[name = tensor<string, []>("op_7752_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7752_cast_fp16 = concat(axis = var_6847, interleave = var_7752_interleave_0, values = (var_7704_cast_fp16, var_7706_cast_fp16, var_7708_cast_fp16, var_7710_cast_fp16))[name = tensor<string, []>("op_7752_cast_fp16")];
+            tensor<bool, []> var_7754_interleave_0 = const()[name = tensor<string, []>("op_7754_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7754_cast_fp16 = concat(axis = var_6847, interleave = var_7754_interleave_0, values = (var_7712_cast_fp16, var_7714_cast_fp16, var_7716_cast_fp16, var_7718_cast_fp16))[name = tensor<string, []>("op_7754_cast_fp16")];
+            tensor<bool, []> var_7756_interleave_0 = const()[name = tensor<string, []>("op_7756_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7756_cast_fp16 = concat(axis = var_6847, interleave = var_7756_interleave_0, values = (var_7720_cast_fp16, var_7722_cast_fp16, var_7724_cast_fp16, var_7726_cast_fp16))[name = tensor<string, []>("op_7756_cast_fp16")];
+            tensor<bool, []> var_7758_interleave_0 = const()[name = tensor<string, []>("op_7758_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7758_cast_fp16 = concat(axis = var_6847, interleave = var_7758_interleave_0, values = (var_7728_cast_fp16, var_7730_cast_fp16, var_7732_cast_fp16, var_7734_cast_fp16))[name = tensor<string, []>("op_7758_cast_fp16")];
+            tensor<bool, []> input_57_interleave_0 = const()[name = tensor<string, []>("input_57_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = concat(axis = var_6864, interleave = input_57_interleave_0, values = (var_7736_cast_fp16, var_7738_cast_fp16, var_7740_cast_fp16, var_7742_cast_fp16, var_7744_cast_fp16, var_7746_cast_fp16, var_7748_cast_fp16, var_7750_cast_fp16, var_7752_cast_fp16, var_7754_cast_fp16, var_7756_cast_fp16, var_7758_cast_fp16))[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<int32, [2]> var_7763 = const()[name = tensor<string, []>("op_7763"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7765 = const()[name = tensor<string, []>("op_7765"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_31_pad_type_0 = const()[name = tensor<string, []>("obj_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_31_pad_0 = const()[name = tensor<string, []>("obj_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108989952)))];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110169664)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_31_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = var_7765, groups = var_6864, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = var_7763, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> var_7771 = const()[name = tensor<string, []>("op_7771"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_31_cast_fp16 = reduce_mean(axes = var_7771, keep_dims = var_6865, x = inputs_31_cast_fp16)[name = tensor<string, []>("channels_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_sq_31_cast_fp16")];
+            tensor<int32, [1]> var_7775 = const()[name = tensor<string, []>("op_7775"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_7776_cast_fp16 = reduce_mean(axes = var_7775, keep_dims = var_6865, x = zero_mean_sq_31_cast_fp16)[name = tensor<string, []>("op_7776_cast_fp16")];
+            tensor<fp16, []> var_7777_to_fp16 = const()[name = tensor<string, []>("op_7777_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_7778_cast_fp16 = add(x = var_7776_cast_fp16, y = var_7777_to_fp16)[name = tensor<string, []>("op_7778_cast_fp16")];
+            tensor<fp16, []> denom_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_7778_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [768]> input_59_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_59_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110171264)))];
+            tensor<fp16, [768]> input_59_beta_0_to_fp16 = const()[name = tensor<string, []>("input_59_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110172864)))];
+            tensor<fp16, []> input_59_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_59_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<int32, [2]> var_7789 = const()[name = tensor<string, []>("op_7789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7791 = const()[name = tensor<string, []>("op_7791"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_61_pad_type_0 = const()[name = tensor<string, []>("input_61_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_61_pad_0 = const()[name = tensor<string, []>("input_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110174464)))];
+            tensor<fp16, [3072]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114893120)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = var_7791, groups = var_6864, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = var_7789, weight = layers_7_fc1_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> input_63_mode_0 = const()[name = tensor<string, []>("input_63_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<int32, [2]> var_7797 = const()[name = tensor<string, []>("op_7797"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7799 = const()[name = tensor<string, []>("op_7799"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114899328)))];
+            tensor<fp16, [768]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119617984)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_19_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = var_7799, groups = var_6864, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_7797, weight = layers_7_fc2_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_7806 = const()[name = tensor<string, []>("op_7806"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_7823 = const()[name = tensor<string, []>("op_7823"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_7824 = const()[name = tensor<string, []>("op_7824"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_7834 = const()[name = tensor<string, []>("op_7834"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_33_cast_fp16 = reduce_mean(axes = var_7834, keep_dims = var_7824, x = inputs_33_cast_fp16)[name = tensor<string, []>("channels_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_sq_33_cast_fp16")];
+            tensor<int32, [1]> var_7838 = const()[name = tensor<string, []>("op_7838"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_7839_cast_fp16 = reduce_mean(axes = var_7838, keep_dims = var_7824, x = zero_mean_sq_33_cast_fp16)[name = tensor<string, []>("op_7839_cast_fp16")];
+            tensor<fp16, []> var_7840_to_fp16 = const()[name = tensor<string, []>("op_7840_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_7841_cast_fp16 = add(x = var_7839_cast_fp16, y = var_7840_to_fp16)[name = tensor<string, []>("op_7841_cast_fp16")];
+            tensor<fp16, []> denom_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_7841_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119619584)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119621184)))];
+            tensor<fp16, []> obj_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_33_cast_fp16")];
+            tensor<int32, [2]> var_7856 = const()[name = tensor<string, []>("op_7856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7858 = const()[name = tensor<string, []>("op_7858"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119622784)))];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120802496)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_17_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = var_7858, groups = var_7823, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_7856, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<int32, [2]> var_7862 = const()[name = tensor<string, []>("op_7862"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7864 = const()[name = tensor<string, []>("op_7864"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_17_pad_type_0 = const()[name = tensor<string, []>("key_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_17_pad_0 = const()[name = tensor<string, []>("key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120804096)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_17_cast_fp16 = conv(dilations = var_7864, groups = var_7823, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = var_7862, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<int32, [2]> var_7869 = const()[name = tensor<string, []>("op_7869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7871 = const()[name = tensor<string, []>("op_7871"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_17_pad_type_0 = const()[name = tensor<string, []>("value_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_17_pad_0 = const()[name = tensor<string, []>("value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121983808)))];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123163520)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = var_7871, groups = var_7823, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = var_7869, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_7878_begin_0 = const()[name = tensor<string, []>("op_7878_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7878_end_0 = const()[name = tensor<string, []>("op_7878_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7878_end_mask_0 = const()[name = tensor<string, []>("op_7878_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7878_cast_fp16 = slice_by_index(begin = var_7878_begin_0, end = var_7878_end_0, end_mask = var_7878_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7878_cast_fp16")];
+            tensor<int32, [4]> var_7882_begin_0 = const()[name = tensor<string, []>("op_7882_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_7882_end_0 = const()[name = tensor<string, []>("op_7882_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_7882_end_mask_0 = const()[name = tensor<string, []>("op_7882_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7882_cast_fp16 = slice_by_index(begin = var_7882_begin_0, end = var_7882_end_0, end_mask = var_7882_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7882_cast_fp16")];
+            tensor<int32, [4]> var_7886_begin_0 = const()[name = tensor<string, []>("op_7886_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_7886_end_0 = const()[name = tensor<string, []>("op_7886_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_7886_end_mask_0 = const()[name = tensor<string, []>("op_7886_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7886_cast_fp16 = slice_by_index(begin = var_7886_begin_0, end = var_7886_end_0, end_mask = var_7886_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7886_cast_fp16")];
+            tensor<int32, [4]> var_7890_begin_0 = const()[name = tensor<string, []>("op_7890_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_7890_end_0 = const()[name = tensor<string, []>("op_7890_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_7890_end_mask_0 = const()[name = tensor<string, []>("op_7890_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7890_cast_fp16 = slice_by_index(begin = var_7890_begin_0, end = var_7890_end_0, end_mask = var_7890_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7890_cast_fp16")];
+            tensor<int32, [4]> var_7894_begin_0 = const()[name = tensor<string, []>("op_7894_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_7894_end_0 = const()[name = tensor<string, []>("op_7894_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_7894_end_mask_0 = const()[name = tensor<string, []>("op_7894_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7894_cast_fp16 = slice_by_index(begin = var_7894_begin_0, end = var_7894_end_0, end_mask = var_7894_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7894_cast_fp16")];
+            tensor<int32, [4]> var_7898_begin_0 = const()[name = tensor<string, []>("op_7898_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_7898_end_0 = const()[name = tensor<string, []>("op_7898_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_7898_end_mask_0 = const()[name = tensor<string, []>("op_7898_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7898_cast_fp16 = slice_by_index(begin = var_7898_begin_0, end = var_7898_end_0, end_mask = var_7898_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7898_cast_fp16")];
+            tensor<int32, [4]> var_7902_begin_0 = const()[name = tensor<string, []>("op_7902_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_7902_end_0 = const()[name = tensor<string, []>("op_7902_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_7902_end_mask_0 = const()[name = tensor<string, []>("op_7902_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7902_cast_fp16 = slice_by_index(begin = var_7902_begin_0, end = var_7902_end_0, end_mask = var_7902_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7902_cast_fp16")];
+            tensor<int32, [4]> var_7906_begin_0 = const()[name = tensor<string, []>("op_7906_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_7906_end_0 = const()[name = tensor<string, []>("op_7906_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_7906_end_mask_0 = const()[name = tensor<string, []>("op_7906_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7906_cast_fp16 = slice_by_index(begin = var_7906_begin_0, end = var_7906_end_0, end_mask = var_7906_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7906_cast_fp16")];
+            tensor<int32, [4]> var_7910_begin_0 = const()[name = tensor<string, []>("op_7910_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_7910_end_0 = const()[name = tensor<string, []>("op_7910_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_7910_end_mask_0 = const()[name = tensor<string, []>("op_7910_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7910_cast_fp16 = slice_by_index(begin = var_7910_begin_0, end = var_7910_end_0, end_mask = var_7910_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7910_cast_fp16")];
+            tensor<int32, [4]> var_7914_begin_0 = const()[name = tensor<string, []>("op_7914_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_7914_end_0 = const()[name = tensor<string, []>("op_7914_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_7914_end_mask_0 = const()[name = tensor<string, []>("op_7914_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7914_cast_fp16 = slice_by_index(begin = var_7914_begin_0, end = var_7914_end_0, end_mask = var_7914_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7914_cast_fp16")];
+            tensor<int32, [4]> var_7918_begin_0 = const()[name = tensor<string, []>("op_7918_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_7918_end_0 = const()[name = tensor<string, []>("op_7918_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_7918_end_mask_0 = const()[name = tensor<string, []>("op_7918_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7918_cast_fp16 = slice_by_index(begin = var_7918_begin_0, end = var_7918_end_0, end_mask = var_7918_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7918_cast_fp16")];
+            tensor<int32, [4]> var_7922_begin_0 = const()[name = tensor<string, []>("op_7922_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_7922_end_0 = const()[name = tensor<string, []>("op_7922_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_7922_end_mask_0 = const()[name = tensor<string, []>("op_7922_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7922_cast_fp16 = slice_by_index(begin = var_7922_begin_0, end = var_7922_end_0, end_mask = var_7922_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7922_cast_fp16")];
+            tensor<int32, [4]> var_7931_begin_0 = const()[name = tensor<string, []>("op_7931_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7931_end_0 = const()[name = tensor<string, []>("op_7931_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7931_end_mask_0 = const()[name = tensor<string, []>("op_7931_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7931_cast_fp16 = slice_by_index(begin = var_7931_begin_0, end = var_7931_end_0, end_mask = var_7931_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7931_cast_fp16")];
+            tensor<int32, [4]> var_7938_begin_0 = const()[name = tensor<string, []>("op_7938_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7938_end_0 = const()[name = tensor<string, []>("op_7938_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7938_end_mask_0 = const()[name = tensor<string, []>("op_7938_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7938_cast_fp16 = slice_by_index(begin = var_7938_begin_0, end = var_7938_end_0, end_mask = var_7938_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7938_cast_fp16")];
+            tensor<int32, [4]> var_7945_begin_0 = const()[name = tensor<string, []>("op_7945_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7945_end_0 = const()[name = tensor<string, []>("op_7945_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7945_end_mask_0 = const()[name = tensor<string, []>("op_7945_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7945_cast_fp16 = slice_by_index(begin = var_7945_begin_0, end = var_7945_end_0, end_mask = var_7945_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7945_cast_fp16")];
+            tensor<int32, [4]> var_7952_begin_0 = const()[name = tensor<string, []>("op_7952_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7952_end_0 = const()[name = tensor<string, []>("op_7952_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7952_end_mask_0 = const()[name = tensor<string, []>("op_7952_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7952_cast_fp16 = slice_by_index(begin = var_7952_begin_0, end = var_7952_end_0, end_mask = var_7952_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7952_cast_fp16")];
+            tensor<int32, [4]> var_7959_begin_0 = const()[name = tensor<string, []>("op_7959_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7959_end_0 = const()[name = tensor<string, []>("op_7959_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7959_end_mask_0 = const()[name = tensor<string, []>("op_7959_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7959_cast_fp16 = slice_by_index(begin = var_7959_begin_0, end = var_7959_end_0, end_mask = var_7959_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7959_cast_fp16")];
+            tensor<int32, [4]> var_7966_begin_0 = const()[name = tensor<string, []>("op_7966_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7966_end_0 = const()[name = tensor<string, []>("op_7966_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7966_end_mask_0 = const()[name = tensor<string, []>("op_7966_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7966_cast_fp16 = slice_by_index(begin = var_7966_begin_0, end = var_7966_end_0, end_mask = var_7966_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7966_cast_fp16")];
+            tensor<int32, [4]> var_7973_begin_0 = const()[name = tensor<string, []>("op_7973_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7973_end_0 = const()[name = tensor<string, []>("op_7973_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7973_end_mask_0 = const()[name = tensor<string, []>("op_7973_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7973_cast_fp16 = slice_by_index(begin = var_7973_begin_0, end = var_7973_end_0, end_mask = var_7973_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7973_cast_fp16")];
+            tensor<int32, [4]> var_7980_begin_0 = const()[name = tensor<string, []>("op_7980_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7980_end_0 = const()[name = tensor<string, []>("op_7980_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7980_end_mask_0 = const()[name = tensor<string, []>("op_7980_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7980_cast_fp16 = slice_by_index(begin = var_7980_begin_0, end = var_7980_end_0, end_mask = var_7980_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7980_cast_fp16")];
+            tensor<int32, [4]> var_7987_begin_0 = const()[name = tensor<string, []>("op_7987_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7987_end_0 = const()[name = tensor<string, []>("op_7987_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7987_end_mask_0 = const()[name = tensor<string, []>("op_7987_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7987_cast_fp16 = slice_by_index(begin = var_7987_begin_0, end = var_7987_end_0, end_mask = var_7987_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_7987_cast_fp16")];
+            tensor<int32, [4]> var_7994_begin_0 = const()[name = tensor<string, []>("op_7994_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7994_end_0 = const()[name = tensor<string, []>("op_7994_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7994_end_mask_0 = const()[name = tensor<string, []>("op_7994_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7994_cast_fp16 = slice_by_index(begin = var_7994_begin_0, end = var_7994_end_0, end_mask = var_7994_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_7994_cast_fp16")];
+            tensor<int32, [4]> var_8001_begin_0 = const()[name = tensor<string, []>("op_8001_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8001_end_0 = const()[name = tensor<string, []>("op_8001_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8001_end_mask_0 = const()[name = tensor<string, []>("op_8001_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8001_cast_fp16 = slice_by_index(begin = var_8001_begin_0, end = var_8001_end_0, end_mask = var_8001_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_8001_cast_fp16")];
+            tensor<int32, [4]> var_8008_begin_0 = const()[name = tensor<string, []>("op_8008_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8008_end_0 = const()[name = tensor<string, []>("op_8008_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8008_end_mask_0 = const()[name = tensor<string, []>("op_8008_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8008_cast_fp16 = slice_by_index(begin = var_8008_begin_0, end = var_8008_end_0, end_mask = var_8008_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_8008_cast_fp16")];
+            tensor<int32, [4]> var_8015_begin_0 = const()[name = tensor<string, []>("op_8015_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8015_end_0 = const()[name = tensor<string, []>("op_8015_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8015_end_mask_0 = const()[name = tensor<string, []>("op_8015_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8015_cast_fp16 = slice_by_index(begin = var_8015_begin_0, end = var_8015_end_0, end_mask = var_8015_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8015_cast_fp16")];
+            tensor<int32, [4]> var_8022_begin_0 = const()[name = tensor<string, []>("op_8022_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8022_end_0 = const()[name = tensor<string, []>("op_8022_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8022_end_mask_0 = const()[name = tensor<string, []>("op_8022_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8022_cast_fp16 = slice_by_index(begin = var_8022_begin_0, end = var_8022_end_0, end_mask = var_8022_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8022_cast_fp16")];
+            tensor<int32, [4]> var_8029_begin_0 = const()[name = tensor<string, []>("op_8029_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8029_end_0 = const()[name = tensor<string, []>("op_8029_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8029_end_mask_0 = const()[name = tensor<string, []>("op_8029_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8029_cast_fp16 = slice_by_index(begin = var_8029_begin_0, end = var_8029_end_0, end_mask = var_8029_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8029_cast_fp16")];
+            tensor<int32, [4]> var_8036_begin_0 = const()[name = tensor<string, []>("op_8036_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8036_end_0 = const()[name = tensor<string, []>("op_8036_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8036_end_mask_0 = const()[name = tensor<string, []>("op_8036_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8036_cast_fp16 = slice_by_index(begin = var_8036_begin_0, end = var_8036_end_0, end_mask = var_8036_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8036_cast_fp16")];
+            tensor<int32, [4]> var_8043_begin_0 = const()[name = tensor<string, []>("op_8043_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8043_end_0 = const()[name = tensor<string, []>("op_8043_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8043_end_mask_0 = const()[name = tensor<string, []>("op_8043_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8043_cast_fp16 = slice_by_index(begin = var_8043_begin_0, end = var_8043_end_0, end_mask = var_8043_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8043_cast_fp16")];
+            tensor<int32, [4]> var_8050_begin_0 = const()[name = tensor<string, []>("op_8050_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8050_end_0 = const()[name = tensor<string, []>("op_8050_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8050_end_mask_0 = const()[name = tensor<string, []>("op_8050_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8050_cast_fp16 = slice_by_index(begin = var_8050_begin_0, end = var_8050_end_0, end_mask = var_8050_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8050_cast_fp16")];
+            tensor<int32, [4]> var_8057_begin_0 = const()[name = tensor<string, []>("op_8057_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8057_end_0 = const()[name = tensor<string, []>("op_8057_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8057_end_mask_0 = const()[name = tensor<string, []>("op_8057_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8057_cast_fp16 = slice_by_index(begin = var_8057_begin_0, end = var_8057_end_0, end_mask = var_8057_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8057_cast_fp16")];
+            tensor<int32, [4]> var_8064_begin_0 = const()[name = tensor<string, []>("op_8064_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8064_end_0 = const()[name = tensor<string, []>("op_8064_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8064_end_mask_0 = const()[name = tensor<string, []>("op_8064_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8064_cast_fp16 = slice_by_index(begin = var_8064_begin_0, end = var_8064_end_0, end_mask = var_8064_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8064_cast_fp16")];
+            tensor<int32, [4]> var_8071_begin_0 = const()[name = tensor<string, []>("op_8071_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8071_end_0 = const()[name = tensor<string, []>("op_8071_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8071_end_mask_0 = const()[name = tensor<string, []>("op_8071_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8071_cast_fp16 = slice_by_index(begin = var_8071_begin_0, end = var_8071_end_0, end_mask = var_8071_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8071_cast_fp16")];
+            tensor<int32, [4]> var_8078_begin_0 = const()[name = tensor<string, []>("op_8078_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8078_end_0 = const()[name = tensor<string, []>("op_8078_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8078_end_mask_0 = const()[name = tensor<string, []>("op_8078_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8078_cast_fp16 = slice_by_index(begin = var_8078_begin_0, end = var_8078_end_0, end_mask = var_8078_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8078_cast_fp16")];
+            tensor<int32, [4]> var_8085_begin_0 = const()[name = tensor<string, []>("op_8085_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8085_end_0 = const()[name = tensor<string, []>("op_8085_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8085_end_mask_0 = const()[name = tensor<string, []>("op_8085_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8085_cast_fp16 = slice_by_index(begin = var_8085_begin_0, end = var_8085_end_0, end_mask = var_8085_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8085_cast_fp16")];
+            tensor<int32, [4]> var_8092_begin_0 = const()[name = tensor<string, []>("op_8092_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8092_end_0 = const()[name = tensor<string, []>("op_8092_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8092_end_mask_0 = const()[name = tensor<string, []>("op_8092_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8092_cast_fp16 = slice_by_index(begin = var_8092_begin_0, end = var_8092_end_0, end_mask = var_8092_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8092_cast_fp16")];
+            tensor<int32, [4]> var_8099_begin_0 = const()[name = tensor<string, []>("op_8099_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8099_end_0 = const()[name = tensor<string, []>("op_8099_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8099_end_mask_0 = const()[name = tensor<string, []>("op_8099_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8099_cast_fp16 = slice_by_index(begin = var_8099_begin_0, end = var_8099_end_0, end_mask = var_8099_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8099_cast_fp16")];
+            tensor<int32, [4]> var_8106_begin_0 = const()[name = tensor<string, []>("op_8106_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8106_end_0 = const()[name = tensor<string, []>("op_8106_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8106_end_mask_0 = const()[name = tensor<string, []>("op_8106_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8106_cast_fp16 = slice_by_index(begin = var_8106_begin_0, end = var_8106_end_0, end_mask = var_8106_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8106_cast_fp16")];
+            tensor<int32, [4]> var_8113_begin_0 = const()[name = tensor<string, []>("op_8113_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8113_end_0 = const()[name = tensor<string, []>("op_8113_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8113_end_mask_0 = const()[name = tensor<string, []>("op_8113_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8113_cast_fp16 = slice_by_index(begin = var_8113_begin_0, end = var_8113_end_0, end_mask = var_8113_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8113_cast_fp16")];
+            tensor<int32, [4]> var_8120_begin_0 = const()[name = tensor<string, []>("op_8120_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8120_end_0 = const()[name = tensor<string, []>("op_8120_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8120_end_mask_0 = const()[name = tensor<string, []>("op_8120_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8120_cast_fp16 = slice_by_index(begin = var_8120_begin_0, end = var_8120_end_0, end_mask = var_8120_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8120_cast_fp16")];
+            tensor<int32, [4]> var_8127_begin_0 = const()[name = tensor<string, []>("op_8127_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8127_end_0 = const()[name = tensor<string, []>("op_8127_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8127_end_mask_0 = const()[name = tensor<string, []>("op_8127_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8127_cast_fp16 = slice_by_index(begin = var_8127_begin_0, end = var_8127_end_0, end_mask = var_8127_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8127_cast_fp16")];
+            tensor<int32, [4]> var_8134_begin_0 = const()[name = tensor<string, []>("op_8134_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8134_end_0 = const()[name = tensor<string, []>("op_8134_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8134_end_mask_0 = const()[name = tensor<string, []>("op_8134_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8134_cast_fp16 = slice_by_index(begin = var_8134_begin_0, end = var_8134_end_0, end_mask = var_8134_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8134_cast_fp16")];
+            tensor<int32, [4]> var_8141_begin_0 = const()[name = tensor<string, []>("op_8141_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8141_end_0 = const()[name = tensor<string, []>("op_8141_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8141_end_mask_0 = const()[name = tensor<string, []>("op_8141_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8141_cast_fp16 = slice_by_index(begin = var_8141_begin_0, end = var_8141_end_0, end_mask = var_8141_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8141_cast_fp16")];
+            tensor<int32, [4]> var_8148_begin_0 = const()[name = tensor<string, []>("op_8148_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8148_end_0 = const()[name = tensor<string, []>("op_8148_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8148_end_mask_0 = const()[name = tensor<string, []>("op_8148_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8148_cast_fp16 = slice_by_index(begin = var_8148_begin_0, end = var_8148_end_0, end_mask = var_8148_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8148_cast_fp16")];
+            tensor<int32, [4]> var_8155_begin_0 = const()[name = tensor<string, []>("op_8155_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8155_end_0 = const()[name = tensor<string, []>("op_8155_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8155_end_mask_0 = const()[name = tensor<string, []>("op_8155_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8155_cast_fp16 = slice_by_index(begin = var_8155_begin_0, end = var_8155_end_0, end_mask = var_8155_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8155_cast_fp16")];
+            tensor<int32, [4]> var_8162_begin_0 = const()[name = tensor<string, []>("op_8162_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8162_end_0 = const()[name = tensor<string, []>("op_8162_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8162_end_mask_0 = const()[name = tensor<string, []>("op_8162_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8162_cast_fp16 = slice_by_index(begin = var_8162_begin_0, end = var_8162_end_0, end_mask = var_8162_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8162_cast_fp16")];
+            tensor<int32, [4]> var_8169_begin_0 = const()[name = tensor<string, []>("op_8169_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8169_end_0 = const()[name = tensor<string, []>("op_8169_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8169_end_mask_0 = const()[name = tensor<string, []>("op_8169_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8169_cast_fp16 = slice_by_index(begin = var_8169_begin_0, end = var_8169_end_0, end_mask = var_8169_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8169_cast_fp16")];
+            tensor<int32, [4]> var_8176_begin_0 = const()[name = tensor<string, []>("op_8176_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8176_end_0 = const()[name = tensor<string, []>("op_8176_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8176_end_mask_0 = const()[name = tensor<string, []>("op_8176_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8176_cast_fp16 = slice_by_index(begin = var_8176_begin_0, end = var_8176_end_0, end_mask = var_8176_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8176_cast_fp16")];
+            tensor<int32, [4]> var_8183_begin_0 = const()[name = tensor<string, []>("op_8183_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8183_end_0 = const()[name = tensor<string, []>("op_8183_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8183_end_mask_0 = const()[name = tensor<string, []>("op_8183_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8183_cast_fp16 = slice_by_index(begin = var_8183_begin_0, end = var_8183_end_0, end_mask = var_8183_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8183_cast_fp16")];
+            tensor<int32, [4]> var_8190_begin_0 = const()[name = tensor<string, []>("op_8190_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8190_end_0 = const()[name = tensor<string, []>("op_8190_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8190_end_mask_0 = const()[name = tensor<string, []>("op_8190_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8190_cast_fp16 = slice_by_index(begin = var_8190_begin_0, end = var_8190_end_0, end_mask = var_8190_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8190_cast_fp16")];
+            tensor<int32, [4]> var_8197_begin_0 = const()[name = tensor<string, []>("op_8197_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8197_end_0 = const()[name = tensor<string, []>("op_8197_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8197_end_mask_0 = const()[name = tensor<string, []>("op_8197_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8197_cast_fp16 = slice_by_index(begin = var_8197_begin_0, end = var_8197_end_0, end_mask = var_8197_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8197_cast_fp16")];
+            tensor<int32, [4]> var_8204_begin_0 = const()[name = tensor<string, []>("op_8204_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8204_end_0 = const()[name = tensor<string, []>("op_8204_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8204_end_mask_0 = const()[name = tensor<string, []>("op_8204_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8204_cast_fp16 = slice_by_index(begin = var_8204_begin_0, end = var_8204_end_0, end_mask = var_8204_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8204_cast_fp16")];
+            tensor<int32, [4]> var_8211_begin_0 = const()[name = tensor<string, []>("op_8211_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8211_end_0 = const()[name = tensor<string, []>("op_8211_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8211_end_mask_0 = const()[name = tensor<string, []>("op_8211_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8211_cast_fp16 = slice_by_index(begin = var_8211_begin_0, end = var_8211_end_0, end_mask = var_8211_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8211_cast_fp16")];
+            tensor<int32, [4]> var_8218_begin_0 = const()[name = tensor<string, []>("op_8218_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8218_end_0 = const()[name = tensor<string, []>("op_8218_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8218_end_mask_0 = const()[name = tensor<string, []>("op_8218_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8218_cast_fp16 = slice_by_index(begin = var_8218_begin_0, end = var_8218_end_0, end_mask = var_8218_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8218_cast_fp16")];
+            tensor<int32, [4]> var_8225_begin_0 = const()[name = tensor<string, []>("op_8225_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8225_end_0 = const()[name = tensor<string, []>("op_8225_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8225_end_mask_0 = const()[name = tensor<string, []>("op_8225_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8225_cast_fp16 = slice_by_index(begin = var_8225_begin_0, end = var_8225_end_0, end_mask = var_8225_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8225_cast_fp16")];
+            tensor<int32, [4]> var_8232_begin_0 = const()[name = tensor<string, []>("op_8232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8232_end_0 = const()[name = tensor<string, []>("op_8232_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8232_end_mask_0 = const()[name = tensor<string, []>("op_8232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8232_cast_fp16 = slice_by_index(begin = var_8232_begin_0, end = var_8232_end_0, end_mask = var_8232_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8232_cast_fp16")];
+            tensor<int32, [4]> var_8239_begin_0 = const()[name = tensor<string, []>("op_8239_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8239_end_0 = const()[name = tensor<string, []>("op_8239_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8239_end_mask_0 = const()[name = tensor<string, []>("op_8239_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8239_cast_fp16 = slice_by_index(begin = var_8239_begin_0, end = var_8239_end_0, end_mask = var_8239_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8239_cast_fp16")];
+            tensor<int32, [4]> var_8246_begin_0 = const()[name = tensor<string, []>("op_8246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8246_end_0 = const()[name = tensor<string, []>("op_8246_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8246_end_mask_0 = const()[name = tensor<string, []>("op_8246_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8246_cast_fp16 = slice_by_index(begin = var_8246_begin_0, end = var_8246_end_0, end_mask = var_8246_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8246_cast_fp16")];
+            tensor<int32, [4]> var_8253_begin_0 = const()[name = tensor<string, []>("op_8253_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8253_end_0 = const()[name = tensor<string, []>("op_8253_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8253_end_mask_0 = const()[name = tensor<string, []>("op_8253_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8253_cast_fp16 = slice_by_index(begin = var_8253_begin_0, end = var_8253_end_0, end_mask = var_8253_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8253_cast_fp16")];
+            tensor<int32, [4]> var_8260_begin_0 = const()[name = tensor<string, []>("op_8260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8260_end_0 = const()[name = tensor<string, []>("op_8260_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8260_end_mask_0 = const()[name = tensor<string, []>("op_8260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8260_cast_fp16 = slice_by_index(begin = var_8260_begin_0, end = var_8260_end_0, end_mask = var_8260_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8260_cast_fp16")];
+            tensor<int32, [4]> k_17_perm_0 = const()[name = tensor<string, []>("k_17_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_8265_begin_0 = const()[name = tensor<string, []>("op_8265_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8265_end_0 = const()[name = tensor<string, []>("op_8265_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_8265_end_mask_0 = const()[name = tensor<string, []>("op_8265_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_3 = transpose(perm = k_17_perm_0, x = key_17_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8265_cast_fp16 = slice_by_index(begin = var_8265_begin_0, end = var_8265_end_0, end_mask = var_8265_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8265_cast_fp16")];
+            tensor<int32, [4]> var_8269_begin_0 = const()[name = tensor<string, []>("op_8269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_8269_end_0 = const()[name = tensor<string, []>("op_8269_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_8269_end_mask_0 = const()[name = tensor<string, []>("op_8269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8269_cast_fp16 = slice_by_index(begin = var_8269_begin_0, end = var_8269_end_0, end_mask = var_8269_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8269_cast_fp16")];
+            tensor<int32, [4]> var_8273_begin_0 = const()[name = tensor<string, []>("op_8273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_8273_end_0 = const()[name = tensor<string, []>("op_8273_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_8273_end_mask_0 = const()[name = tensor<string, []>("op_8273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8273_cast_fp16 = slice_by_index(begin = var_8273_begin_0, end = var_8273_end_0, end_mask = var_8273_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8273_cast_fp16")];
+            tensor<int32, [4]> var_8277_begin_0 = const()[name = tensor<string, []>("op_8277_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_8277_end_0 = const()[name = tensor<string, []>("op_8277_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_8277_end_mask_0 = const()[name = tensor<string, []>("op_8277_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8277_cast_fp16 = slice_by_index(begin = var_8277_begin_0, end = var_8277_end_0, end_mask = var_8277_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8277_cast_fp16")];
+            tensor<int32, [4]> var_8281_begin_0 = const()[name = tensor<string, []>("op_8281_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_8281_end_0 = const()[name = tensor<string, []>("op_8281_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_8281_end_mask_0 = const()[name = tensor<string, []>("op_8281_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8281_cast_fp16 = slice_by_index(begin = var_8281_begin_0, end = var_8281_end_0, end_mask = var_8281_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8281_cast_fp16")];
+            tensor<int32, [4]> var_8285_begin_0 = const()[name = tensor<string, []>("op_8285_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_8285_end_0 = const()[name = tensor<string, []>("op_8285_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_8285_end_mask_0 = const()[name = tensor<string, []>("op_8285_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8285_cast_fp16 = slice_by_index(begin = var_8285_begin_0, end = var_8285_end_0, end_mask = var_8285_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8285_cast_fp16")];
+            tensor<int32, [4]> var_8289_begin_0 = const()[name = tensor<string, []>("op_8289_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_8289_end_0 = const()[name = tensor<string, []>("op_8289_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_8289_end_mask_0 = const()[name = tensor<string, []>("op_8289_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8289_cast_fp16 = slice_by_index(begin = var_8289_begin_0, end = var_8289_end_0, end_mask = var_8289_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8289_cast_fp16")];
+            tensor<int32, [4]> var_8293_begin_0 = const()[name = tensor<string, []>("op_8293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_8293_end_0 = const()[name = tensor<string, []>("op_8293_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_8293_end_mask_0 = const()[name = tensor<string, []>("op_8293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8293_cast_fp16 = slice_by_index(begin = var_8293_begin_0, end = var_8293_end_0, end_mask = var_8293_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8293_cast_fp16")];
+            tensor<int32, [4]> var_8297_begin_0 = const()[name = tensor<string, []>("op_8297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_8297_end_0 = const()[name = tensor<string, []>("op_8297_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_8297_end_mask_0 = const()[name = tensor<string, []>("op_8297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8297_cast_fp16 = slice_by_index(begin = var_8297_begin_0, end = var_8297_end_0, end_mask = var_8297_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8297_cast_fp16")];
+            tensor<int32, [4]> var_8301_begin_0 = const()[name = tensor<string, []>("op_8301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_8301_end_0 = const()[name = tensor<string, []>("op_8301_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_8301_end_mask_0 = const()[name = tensor<string, []>("op_8301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8301_cast_fp16 = slice_by_index(begin = var_8301_begin_0, end = var_8301_end_0, end_mask = var_8301_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8301_cast_fp16")];
+            tensor<int32, [4]> var_8305_begin_0 = const()[name = tensor<string, []>("op_8305_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_8305_end_0 = const()[name = tensor<string, []>("op_8305_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_8305_end_mask_0 = const()[name = tensor<string, []>("op_8305_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8305_cast_fp16 = slice_by_index(begin = var_8305_begin_0, end = var_8305_end_0, end_mask = var_8305_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8305_cast_fp16")];
+            tensor<int32, [4]> var_8309_begin_0 = const()[name = tensor<string, []>("op_8309_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_8309_end_0 = const()[name = tensor<string, []>("op_8309_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_8309_end_mask_0 = const()[name = tensor<string, []>("op_8309_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8309_cast_fp16 = slice_by_index(begin = var_8309_begin_0, end = var_8309_end_0, end_mask = var_8309_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8309_cast_fp16")];
+            tensor<int32, [4]> var_8311_begin_0 = const()[name = tensor<string, []>("op_8311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8311_end_0 = const()[name = tensor<string, []>("op_8311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8311_end_mask_0 = const()[name = tensor<string, []>("op_8311_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8311_cast_fp16 = slice_by_index(begin = var_8311_begin_0, end = var_8311_end_0, end_mask = var_8311_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8311_cast_fp16")];
+            tensor<int32, [4]> var_8315_begin_0 = const()[name = tensor<string, []>("op_8315_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_8315_end_0 = const()[name = tensor<string, []>("op_8315_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_8315_end_mask_0 = const()[name = tensor<string, []>("op_8315_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8315_cast_fp16 = slice_by_index(begin = var_8315_begin_0, end = var_8315_end_0, end_mask = var_8315_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8315_cast_fp16")];
+            tensor<int32, [4]> var_8319_begin_0 = const()[name = tensor<string, []>("op_8319_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_8319_end_0 = const()[name = tensor<string, []>("op_8319_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_8319_end_mask_0 = const()[name = tensor<string, []>("op_8319_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8319_cast_fp16 = slice_by_index(begin = var_8319_begin_0, end = var_8319_end_0, end_mask = var_8319_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8319_cast_fp16")];
+            tensor<int32, [4]> var_8323_begin_0 = const()[name = tensor<string, []>("op_8323_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_8323_end_0 = const()[name = tensor<string, []>("op_8323_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_8323_end_mask_0 = const()[name = tensor<string, []>("op_8323_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8323_cast_fp16 = slice_by_index(begin = var_8323_begin_0, end = var_8323_end_0, end_mask = var_8323_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8323_cast_fp16")];
+            tensor<int32, [4]> var_8327_begin_0 = const()[name = tensor<string, []>("op_8327_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_8327_end_0 = const()[name = tensor<string, []>("op_8327_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_8327_end_mask_0 = const()[name = tensor<string, []>("op_8327_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8327_cast_fp16 = slice_by_index(begin = var_8327_begin_0, end = var_8327_end_0, end_mask = var_8327_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8327_cast_fp16")];
+            tensor<int32, [4]> var_8331_begin_0 = const()[name = tensor<string, []>("op_8331_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_8331_end_0 = const()[name = tensor<string, []>("op_8331_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_8331_end_mask_0 = const()[name = tensor<string, []>("op_8331_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8331_cast_fp16 = slice_by_index(begin = var_8331_begin_0, end = var_8331_end_0, end_mask = var_8331_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8331_cast_fp16")];
+            tensor<int32, [4]> var_8335_begin_0 = const()[name = tensor<string, []>("op_8335_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_8335_end_0 = const()[name = tensor<string, []>("op_8335_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_8335_end_mask_0 = const()[name = tensor<string, []>("op_8335_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8335_cast_fp16 = slice_by_index(begin = var_8335_begin_0, end = var_8335_end_0, end_mask = var_8335_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8335_cast_fp16")];
+            tensor<int32, [4]> var_8339_begin_0 = const()[name = tensor<string, []>("op_8339_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_8339_end_0 = const()[name = tensor<string, []>("op_8339_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_8339_end_mask_0 = const()[name = tensor<string, []>("op_8339_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8339_cast_fp16 = slice_by_index(begin = var_8339_begin_0, end = var_8339_end_0, end_mask = var_8339_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8339_cast_fp16")];
+            tensor<int32, [4]> var_8343_begin_0 = const()[name = tensor<string, []>("op_8343_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_8343_end_0 = const()[name = tensor<string, []>("op_8343_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_8343_end_mask_0 = const()[name = tensor<string, []>("op_8343_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8343_cast_fp16 = slice_by_index(begin = var_8343_begin_0, end = var_8343_end_0, end_mask = var_8343_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8343_cast_fp16")];
+            tensor<int32, [4]> var_8347_begin_0 = const()[name = tensor<string, []>("op_8347_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_8347_end_0 = const()[name = tensor<string, []>("op_8347_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_8347_end_mask_0 = const()[name = tensor<string, []>("op_8347_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8347_cast_fp16 = slice_by_index(begin = var_8347_begin_0, end = var_8347_end_0, end_mask = var_8347_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8347_cast_fp16")];
+            tensor<int32, [4]> var_8351_begin_0 = const()[name = tensor<string, []>("op_8351_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_8351_end_0 = const()[name = tensor<string, []>("op_8351_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_8351_end_mask_0 = const()[name = tensor<string, []>("op_8351_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8351_cast_fp16 = slice_by_index(begin = var_8351_begin_0, end = var_8351_end_0, end_mask = var_8351_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8351_cast_fp16")];
+            tensor<int32, [4]> var_8355_begin_0 = const()[name = tensor<string, []>("op_8355_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_8355_end_0 = const()[name = tensor<string, []>("op_8355_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_8355_end_mask_0 = const()[name = tensor<string, []>("op_8355_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8355_cast_fp16 = slice_by_index(begin = var_8355_begin_0, end = var_8355_end_0, end_mask = var_8355_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8355_cast_fp16")];
+            tensor<string, []> var_8359_equation_0 = const()[name = tensor<string, []>("op_8359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8359_cast_fp16 = einsum(equation = var_8359_equation_0, values = (var_8265_cast_fp16, var_7931_cast_fp16))[name = tensor<string, []>("op_8359_cast_fp16")];
+            tensor<fp16, []> var_8360_to_fp16 = const()[name = tensor<string, []>("op_8360_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_769_cast_fp16 = mul(x = var_8359_cast_fp16, y = var_8360_to_fp16)[name = tensor<string, []>("aw_chunk_769_cast_fp16")];
+            tensor<string, []> var_8363_equation_0 = const()[name = tensor<string, []>("op_8363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8363_cast_fp16 = einsum(equation = var_8363_equation_0, values = (var_8265_cast_fp16, var_7938_cast_fp16))[name = tensor<string, []>("op_8363_cast_fp16")];
+            tensor<fp16, []> var_8364_to_fp16 = const()[name = tensor<string, []>("op_8364_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_771_cast_fp16 = mul(x = var_8363_cast_fp16, y = var_8364_to_fp16)[name = tensor<string, []>("aw_chunk_771_cast_fp16")];
+            tensor<string, []> var_8367_equation_0 = const()[name = tensor<string, []>("op_8367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8367_cast_fp16 = einsum(equation = var_8367_equation_0, values = (var_8265_cast_fp16, var_7945_cast_fp16))[name = tensor<string, []>("op_8367_cast_fp16")];
+            tensor<fp16, []> var_8368_to_fp16 = const()[name = tensor<string, []>("op_8368_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_773_cast_fp16 = mul(x = var_8367_cast_fp16, y = var_8368_to_fp16)[name = tensor<string, []>("aw_chunk_773_cast_fp16")];
+            tensor<string, []> var_8371_equation_0 = const()[name = tensor<string, []>("op_8371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8371_cast_fp16 = einsum(equation = var_8371_equation_0, values = (var_8265_cast_fp16, var_7952_cast_fp16))[name = tensor<string, []>("op_8371_cast_fp16")];
+            tensor<fp16, []> var_8372_to_fp16 = const()[name = tensor<string, []>("op_8372_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_775_cast_fp16 = mul(x = var_8371_cast_fp16, y = var_8372_to_fp16)[name = tensor<string, []>("aw_chunk_775_cast_fp16")];
+            tensor<string, []> var_8375_equation_0 = const()[name = tensor<string, []>("op_8375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8375_cast_fp16 = einsum(equation = var_8375_equation_0, values = (var_8269_cast_fp16, var_7959_cast_fp16))[name = tensor<string, []>("op_8375_cast_fp16")];
+            tensor<fp16, []> var_8376_to_fp16 = const()[name = tensor<string, []>("op_8376_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_777_cast_fp16 = mul(x = var_8375_cast_fp16, y = var_8376_to_fp16)[name = tensor<string, []>("aw_chunk_777_cast_fp16")];
+            tensor<string, []> var_8379_equation_0 = const()[name = tensor<string, []>("op_8379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8379_cast_fp16 = einsum(equation = var_8379_equation_0, values = (var_8269_cast_fp16, var_7966_cast_fp16))[name = tensor<string, []>("op_8379_cast_fp16")];
+            tensor<fp16, []> var_8380_to_fp16 = const()[name = tensor<string, []>("op_8380_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_779_cast_fp16 = mul(x = var_8379_cast_fp16, y = var_8380_to_fp16)[name = tensor<string, []>("aw_chunk_779_cast_fp16")];
+            tensor<string, []> var_8383_equation_0 = const()[name = tensor<string, []>("op_8383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8383_cast_fp16 = einsum(equation = var_8383_equation_0, values = (var_8269_cast_fp16, var_7973_cast_fp16))[name = tensor<string, []>("op_8383_cast_fp16")];
+            tensor<fp16, []> var_8384_to_fp16 = const()[name = tensor<string, []>("op_8384_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_781_cast_fp16 = mul(x = var_8383_cast_fp16, y = var_8384_to_fp16)[name = tensor<string, []>("aw_chunk_781_cast_fp16")];
+            tensor<string, []> var_8387_equation_0 = const()[name = tensor<string, []>("op_8387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8387_cast_fp16 = einsum(equation = var_8387_equation_0, values = (var_8269_cast_fp16, var_7980_cast_fp16))[name = tensor<string, []>("op_8387_cast_fp16")];
+            tensor<fp16, []> var_8388_to_fp16 = const()[name = tensor<string, []>("op_8388_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_783_cast_fp16 = mul(x = var_8387_cast_fp16, y = var_8388_to_fp16)[name = tensor<string, []>("aw_chunk_783_cast_fp16")];
+            tensor<string, []> var_8391_equation_0 = const()[name = tensor<string, []>("op_8391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8391_cast_fp16 = einsum(equation = var_8391_equation_0, values = (var_8273_cast_fp16, var_7987_cast_fp16))[name = tensor<string, []>("op_8391_cast_fp16")];
+            tensor<fp16, []> var_8392_to_fp16 = const()[name = tensor<string, []>("op_8392_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_785_cast_fp16 = mul(x = var_8391_cast_fp16, y = var_8392_to_fp16)[name = tensor<string, []>("aw_chunk_785_cast_fp16")];
+            tensor<string, []> var_8395_equation_0 = const()[name = tensor<string, []>("op_8395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8395_cast_fp16 = einsum(equation = var_8395_equation_0, values = (var_8273_cast_fp16, var_7994_cast_fp16))[name = tensor<string, []>("op_8395_cast_fp16")];
+            tensor<fp16, []> var_8396_to_fp16 = const()[name = tensor<string, []>("op_8396_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_787_cast_fp16 = mul(x = var_8395_cast_fp16, y = var_8396_to_fp16)[name = tensor<string, []>("aw_chunk_787_cast_fp16")];
+            tensor<string, []> var_8399_equation_0 = const()[name = tensor<string, []>("op_8399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8399_cast_fp16 = einsum(equation = var_8399_equation_0, values = (var_8273_cast_fp16, var_8001_cast_fp16))[name = tensor<string, []>("op_8399_cast_fp16")];
+            tensor<fp16, []> var_8400_to_fp16 = const()[name = tensor<string, []>("op_8400_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_789_cast_fp16 = mul(x = var_8399_cast_fp16, y = var_8400_to_fp16)[name = tensor<string, []>("aw_chunk_789_cast_fp16")];
+            tensor<string, []> var_8403_equation_0 = const()[name = tensor<string, []>("op_8403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8403_cast_fp16 = einsum(equation = var_8403_equation_0, values = (var_8273_cast_fp16, var_8008_cast_fp16))[name = tensor<string, []>("op_8403_cast_fp16")];
+            tensor<fp16, []> var_8404_to_fp16 = const()[name = tensor<string, []>("op_8404_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_791_cast_fp16 = mul(x = var_8403_cast_fp16, y = var_8404_to_fp16)[name = tensor<string, []>("aw_chunk_791_cast_fp16")];
+            tensor<string, []> var_8407_equation_0 = const()[name = tensor<string, []>("op_8407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8407_cast_fp16 = einsum(equation = var_8407_equation_0, values = (var_8277_cast_fp16, var_8015_cast_fp16))[name = tensor<string, []>("op_8407_cast_fp16")];
+            tensor<fp16, []> var_8408_to_fp16 = const()[name = tensor<string, []>("op_8408_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_793_cast_fp16 = mul(x = var_8407_cast_fp16, y = var_8408_to_fp16)[name = tensor<string, []>("aw_chunk_793_cast_fp16")];
+            tensor<string, []> var_8411_equation_0 = const()[name = tensor<string, []>("op_8411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8411_cast_fp16 = einsum(equation = var_8411_equation_0, values = (var_8277_cast_fp16, var_8022_cast_fp16))[name = tensor<string, []>("op_8411_cast_fp16")];
+            tensor<fp16, []> var_8412_to_fp16 = const()[name = tensor<string, []>("op_8412_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_795_cast_fp16 = mul(x = var_8411_cast_fp16, y = var_8412_to_fp16)[name = tensor<string, []>("aw_chunk_795_cast_fp16")];
+            tensor<string, []> var_8415_equation_0 = const()[name = tensor<string, []>("op_8415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8415_cast_fp16 = einsum(equation = var_8415_equation_0, values = (var_8277_cast_fp16, var_8029_cast_fp16))[name = tensor<string, []>("op_8415_cast_fp16")];
+            tensor<fp16, []> var_8416_to_fp16 = const()[name = tensor<string, []>("op_8416_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_797_cast_fp16 = mul(x = var_8415_cast_fp16, y = var_8416_to_fp16)[name = tensor<string, []>("aw_chunk_797_cast_fp16")];
+            tensor<string, []> var_8419_equation_0 = const()[name = tensor<string, []>("op_8419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8419_cast_fp16 = einsum(equation = var_8419_equation_0, values = (var_8277_cast_fp16, var_8036_cast_fp16))[name = tensor<string, []>("op_8419_cast_fp16")];
+            tensor<fp16, []> var_8420_to_fp16 = const()[name = tensor<string, []>("op_8420_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_799_cast_fp16 = mul(x = var_8419_cast_fp16, y = var_8420_to_fp16)[name = tensor<string, []>("aw_chunk_799_cast_fp16")];
+            tensor<string, []> var_8423_equation_0 = const()[name = tensor<string, []>("op_8423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8423_cast_fp16 = einsum(equation = var_8423_equation_0, values = (var_8281_cast_fp16, var_8043_cast_fp16))[name = tensor<string, []>("op_8423_cast_fp16")];
+            tensor<fp16, []> var_8424_to_fp16 = const()[name = tensor<string, []>("op_8424_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_801_cast_fp16 = mul(x = var_8423_cast_fp16, y = var_8424_to_fp16)[name = tensor<string, []>("aw_chunk_801_cast_fp16")];
+            tensor<string, []> var_8427_equation_0 = const()[name = tensor<string, []>("op_8427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8427_cast_fp16 = einsum(equation = var_8427_equation_0, values = (var_8281_cast_fp16, var_8050_cast_fp16))[name = tensor<string, []>("op_8427_cast_fp16")];
+            tensor<fp16, []> var_8428_to_fp16 = const()[name = tensor<string, []>("op_8428_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_803_cast_fp16 = mul(x = var_8427_cast_fp16, y = var_8428_to_fp16)[name = tensor<string, []>("aw_chunk_803_cast_fp16")];
+            tensor<string, []> var_8431_equation_0 = const()[name = tensor<string, []>("op_8431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8431_cast_fp16 = einsum(equation = var_8431_equation_0, values = (var_8281_cast_fp16, var_8057_cast_fp16))[name = tensor<string, []>("op_8431_cast_fp16")];
+            tensor<fp16, []> var_8432_to_fp16 = const()[name = tensor<string, []>("op_8432_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_805_cast_fp16 = mul(x = var_8431_cast_fp16, y = var_8432_to_fp16)[name = tensor<string, []>("aw_chunk_805_cast_fp16")];
+            tensor<string, []> var_8435_equation_0 = const()[name = tensor<string, []>("op_8435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8435_cast_fp16 = einsum(equation = var_8435_equation_0, values = (var_8281_cast_fp16, var_8064_cast_fp16))[name = tensor<string, []>("op_8435_cast_fp16")];
+            tensor<fp16, []> var_8436_to_fp16 = const()[name = tensor<string, []>("op_8436_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_807_cast_fp16 = mul(x = var_8435_cast_fp16, y = var_8436_to_fp16)[name = tensor<string, []>("aw_chunk_807_cast_fp16")];
+            tensor<string, []> var_8439_equation_0 = const()[name = tensor<string, []>("op_8439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8439_cast_fp16 = einsum(equation = var_8439_equation_0, values = (var_8285_cast_fp16, var_8071_cast_fp16))[name = tensor<string, []>("op_8439_cast_fp16")];
+            tensor<fp16, []> var_8440_to_fp16 = const()[name = tensor<string, []>("op_8440_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_809_cast_fp16 = mul(x = var_8439_cast_fp16, y = var_8440_to_fp16)[name = tensor<string, []>("aw_chunk_809_cast_fp16")];
+            tensor<string, []> var_8443_equation_0 = const()[name = tensor<string, []>("op_8443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8443_cast_fp16 = einsum(equation = var_8443_equation_0, values = (var_8285_cast_fp16, var_8078_cast_fp16))[name = tensor<string, []>("op_8443_cast_fp16")];
+            tensor<fp16, []> var_8444_to_fp16 = const()[name = tensor<string, []>("op_8444_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_811_cast_fp16 = mul(x = var_8443_cast_fp16, y = var_8444_to_fp16)[name = tensor<string, []>("aw_chunk_811_cast_fp16")];
+            tensor<string, []> var_8447_equation_0 = const()[name = tensor<string, []>("op_8447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8447_cast_fp16 = einsum(equation = var_8447_equation_0, values = (var_8285_cast_fp16, var_8085_cast_fp16))[name = tensor<string, []>("op_8447_cast_fp16")];
+            tensor<fp16, []> var_8448_to_fp16 = const()[name = tensor<string, []>("op_8448_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_813_cast_fp16 = mul(x = var_8447_cast_fp16, y = var_8448_to_fp16)[name = tensor<string, []>("aw_chunk_813_cast_fp16")];
+            tensor<string, []> var_8451_equation_0 = const()[name = tensor<string, []>("op_8451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8451_cast_fp16 = einsum(equation = var_8451_equation_0, values = (var_8285_cast_fp16, var_8092_cast_fp16))[name = tensor<string, []>("op_8451_cast_fp16")];
+            tensor<fp16, []> var_8452_to_fp16 = const()[name = tensor<string, []>("op_8452_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_815_cast_fp16 = mul(x = var_8451_cast_fp16, y = var_8452_to_fp16)[name = tensor<string, []>("aw_chunk_815_cast_fp16")];
+            tensor<string, []> var_8455_equation_0 = const()[name = tensor<string, []>("op_8455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8455_cast_fp16 = einsum(equation = var_8455_equation_0, values = (var_8289_cast_fp16, var_8099_cast_fp16))[name = tensor<string, []>("op_8455_cast_fp16")];
+            tensor<fp16, []> var_8456_to_fp16 = const()[name = tensor<string, []>("op_8456_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_817_cast_fp16 = mul(x = var_8455_cast_fp16, y = var_8456_to_fp16)[name = tensor<string, []>("aw_chunk_817_cast_fp16")];
+            tensor<string, []> var_8459_equation_0 = const()[name = tensor<string, []>("op_8459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8459_cast_fp16 = einsum(equation = var_8459_equation_0, values = (var_8289_cast_fp16, var_8106_cast_fp16))[name = tensor<string, []>("op_8459_cast_fp16")];
+            tensor<fp16, []> var_8460_to_fp16 = const()[name = tensor<string, []>("op_8460_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_819_cast_fp16 = mul(x = var_8459_cast_fp16, y = var_8460_to_fp16)[name = tensor<string, []>("aw_chunk_819_cast_fp16")];
+            tensor<string, []> var_8463_equation_0 = const()[name = tensor<string, []>("op_8463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8463_cast_fp16 = einsum(equation = var_8463_equation_0, values = (var_8289_cast_fp16, var_8113_cast_fp16))[name = tensor<string, []>("op_8463_cast_fp16")];
+            tensor<fp16, []> var_8464_to_fp16 = const()[name = tensor<string, []>("op_8464_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_821_cast_fp16 = mul(x = var_8463_cast_fp16, y = var_8464_to_fp16)[name = tensor<string, []>("aw_chunk_821_cast_fp16")];
+            tensor<string, []> var_8467_equation_0 = const()[name = tensor<string, []>("op_8467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8467_cast_fp16 = einsum(equation = var_8467_equation_0, values = (var_8289_cast_fp16, var_8120_cast_fp16))[name = tensor<string, []>("op_8467_cast_fp16")];
+            tensor<fp16, []> var_8468_to_fp16 = const()[name = tensor<string, []>("op_8468_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_823_cast_fp16 = mul(x = var_8467_cast_fp16, y = var_8468_to_fp16)[name = tensor<string, []>("aw_chunk_823_cast_fp16")];
+            tensor<string, []> var_8471_equation_0 = const()[name = tensor<string, []>("op_8471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8471_cast_fp16 = einsum(equation = var_8471_equation_0, values = (var_8293_cast_fp16, var_8127_cast_fp16))[name = tensor<string, []>("op_8471_cast_fp16")];
+            tensor<fp16, []> var_8472_to_fp16 = const()[name = tensor<string, []>("op_8472_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_825_cast_fp16 = mul(x = var_8471_cast_fp16, y = var_8472_to_fp16)[name = tensor<string, []>("aw_chunk_825_cast_fp16")];
+            tensor<string, []> var_8475_equation_0 = const()[name = tensor<string, []>("op_8475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8475_cast_fp16 = einsum(equation = var_8475_equation_0, values = (var_8293_cast_fp16, var_8134_cast_fp16))[name = tensor<string, []>("op_8475_cast_fp16")];
+            tensor<fp16, []> var_8476_to_fp16 = const()[name = tensor<string, []>("op_8476_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_827_cast_fp16 = mul(x = var_8475_cast_fp16, y = var_8476_to_fp16)[name = tensor<string, []>("aw_chunk_827_cast_fp16")];
+            tensor<string, []> var_8479_equation_0 = const()[name = tensor<string, []>("op_8479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8479_cast_fp16 = einsum(equation = var_8479_equation_0, values = (var_8293_cast_fp16, var_8141_cast_fp16))[name = tensor<string, []>("op_8479_cast_fp16")];
+            tensor<fp16, []> var_8480_to_fp16 = const()[name = tensor<string, []>("op_8480_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_829_cast_fp16 = mul(x = var_8479_cast_fp16, y = var_8480_to_fp16)[name = tensor<string, []>("aw_chunk_829_cast_fp16")];
+            tensor<string, []> var_8483_equation_0 = const()[name = tensor<string, []>("op_8483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8483_cast_fp16 = einsum(equation = var_8483_equation_0, values = (var_8293_cast_fp16, var_8148_cast_fp16))[name = tensor<string, []>("op_8483_cast_fp16")];
+            tensor<fp16, []> var_8484_to_fp16 = const()[name = tensor<string, []>("op_8484_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_831_cast_fp16 = mul(x = var_8483_cast_fp16, y = var_8484_to_fp16)[name = tensor<string, []>("aw_chunk_831_cast_fp16")];
+            tensor<string, []> var_8487_equation_0 = const()[name = tensor<string, []>("op_8487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8487_cast_fp16 = einsum(equation = var_8487_equation_0, values = (var_8297_cast_fp16, var_8155_cast_fp16))[name = tensor<string, []>("op_8487_cast_fp16")];
+            tensor<fp16, []> var_8488_to_fp16 = const()[name = tensor<string, []>("op_8488_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_833_cast_fp16 = mul(x = var_8487_cast_fp16, y = var_8488_to_fp16)[name = tensor<string, []>("aw_chunk_833_cast_fp16")];
+            tensor<string, []> var_8491_equation_0 = const()[name = tensor<string, []>("op_8491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8491_cast_fp16 = einsum(equation = var_8491_equation_0, values = (var_8297_cast_fp16, var_8162_cast_fp16))[name = tensor<string, []>("op_8491_cast_fp16")];
+            tensor<fp16, []> var_8492_to_fp16 = const()[name = tensor<string, []>("op_8492_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_835_cast_fp16 = mul(x = var_8491_cast_fp16, y = var_8492_to_fp16)[name = tensor<string, []>("aw_chunk_835_cast_fp16")];
+            tensor<string, []> var_8495_equation_0 = const()[name = tensor<string, []>("op_8495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8495_cast_fp16 = einsum(equation = var_8495_equation_0, values = (var_8297_cast_fp16, var_8169_cast_fp16))[name = tensor<string, []>("op_8495_cast_fp16")];
+            tensor<fp16, []> var_8496_to_fp16 = const()[name = tensor<string, []>("op_8496_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_837_cast_fp16 = mul(x = var_8495_cast_fp16, y = var_8496_to_fp16)[name = tensor<string, []>("aw_chunk_837_cast_fp16")];
+            tensor<string, []> var_8499_equation_0 = const()[name = tensor<string, []>("op_8499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8499_cast_fp16 = einsum(equation = var_8499_equation_0, values = (var_8297_cast_fp16, var_8176_cast_fp16))[name = tensor<string, []>("op_8499_cast_fp16")];
+            tensor<fp16, []> var_8500_to_fp16 = const()[name = tensor<string, []>("op_8500_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_839_cast_fp16 = mul(x = var_8499_cast_fp16, y = var_8500_to_fp16)[name = tensor<string, []>("aw_chunk_839_cast_fp16")];
+            tensor<string, []> var_8503_equation_0 = const()[name = tensor<string, []>("op_8503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8503_cast_fp16 = einsum(equation = var_8503_equation_0, values = (var_8301_cast_fp16, var_8183_cast_fp16))[name = tensor<string, []>("op_8503_cast_fp16")];
+            tensor<fp16, []> var_8504_to_fp16 = const()[name = tensor<string, []>("op_8504_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_841_cast_fp16 = mul(x = var_8503_cast_fp16, y = var_8504_to_fp16)[name = tensor<string, []>("aw_chunk_841_cast_fp16")];
+            tensor<string, []> var_8507_equation_0 = const()[name = tensor<string, []>("op_8507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8507_cast_fp16 = einsum(equation = var_8507_equation_0, values = (var_8301_cast_fp16, var_8190_cast_fp16))[name = tensor<string, []>("op_8507_cast_fp16")];
+            tensor<fp16, []> var_8508_to_fp16 = const()[name = tensor<string, []>("op_8508_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_843_cast_fp16 = mul(x = var_8507_cast_fp16, y = var_8508_to_fp16)[name = tensor<string, []>("aw_chunk_843_cast_fp16")];
+            tensor<string, []> var_8511_equation_0 = const()[name = tensor<string, []>("op_8511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8511_cast_fp16 = einsum(equation = var_8511_equation_0, values = (var_8301_cast_fp16, var_8197_cast_fp16))[name = tensor<string, []>("op_8511_cast_fp16")];
+            tensor<fp16, []> var_8512_to_fp16 = const()[name = tensor<string, []>("op_8512_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_845_cast_fp16 = mul(x = var_8511_cast_fp16, y = var_8512_to_fp16)[name = tensor<string, []>("aw_chunk_845_cast_fp16")];
+            tensor<string, []> var_8515_equation_0 = const()[name = tensor<string, []>("op_8515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8515_cast_fp16 = einsum(equation = var_8515_equation_0, values = (var_8301_cast_fp16, var_8204_cast_fp16))[name = tensor<string, []>("op_8515_cast_fp16")];
+            tensor<fp16, []> var_8516_to_fp16 = const()[name = tensor<string, []>("op_8516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_847_cast_fp16 = mul(x = var_8515_cast_fp16, y = var_8516_to_fp16)[name = tensor<string, []>("aw_chunk_847_cast_fp16")];
+            tensor<string, []> var_8519_equation_0 = const()[name = tensor<string, []>("op_8519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8519_cast_fp16 = einsum(equation = var_8519_equation_0, values = (var_8305_cast_fp16, var_8211_cast_fp16))[name = tensor<string, []>("op_8519_cast_fp16")];
+            tensor<fp16, []> var_8520_to_fp16 = const()[name = tensor<string, []>("op_8520_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_849_cast_fp16 = mul(x = var_8519_cast_fp16, y = var_8520_to_fp16)[name = tensor<string, []>("aw_chunk_849_cast_fp16")];
+            tensor<string, []> var_8523_equation_0 = const()[name = tensor<string, []>("op_8523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8523_cast_fp16 = einsum(equation = var_8523_equation_0, values = (var_8305_cast_fp16, var_8218_cast_fp16))[name = tensor<string, []>("op_8523_cast_fp16")];
+            tensor<fp16, []> var_8524_to_fp16 = const()[name = tensor<string, []>("op_8524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_851_cast_fp16 = mul(x = var_8523_cast_fp16, y = var_8524_to_fp16)[name = tensor<string, []>("aw_chunk_851_cast_fp16")];
+            tensor<string, []> var_8527_equation_0 = const()[name = tensor<string, []>("op_8527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8527_cast_fp16 = einsum(equation = var_8527_equation_0, values = (var_8305_cast_fp16, var_8225_cast_fp16))[name = tensor<string, []>("op_8527_cast_fp16")];
+            tensor<fp16, []> var_8528_to_fp16 = const()[name = tensor<string, []>("op_8528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_853_cast_fp16 = mul(x = var_8527_cast_fp16, y = var_8528_to_fp16)[name = tensor<string, []>("aw_chunk_853_cast_fp16")];
+            tensor<string, []> var_8531_equation_0 = const()[name = tensor<string, []>("op_8531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8531_cast_fp16 = einsum(equation = var_8531_equation_0, values = (var_8305_cast_fp16, var_8232_cast_fp16))[name = tensor<string, []>("op_8531_cast_fp16")];
+            tensor<fp16, []> var_8532_to_fp16 = const()[name = tensor<string, []>("op_8532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_855_cast_fp16 = mul(x = var_8531_cast_fp16, y = var_8532_to_fp16)[name = tensor<string, []>("aw_chunk_855_cast_fp16")];
+            tensor<string, []> var_8535_equation_0 = const()[name = tensor<string, []>("op_8535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8535_cast_fp16 = einsum(equation = var_8535_equation_0, values = (var_8309_cast_fp16, var_8239_cast_fp16))[name = tensor<string, []>("op_8535_cast_fp16")];
+            tensor<fp16, []> var_8536_to_fp16 = const()[name = tensor<string, []>("op_8536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_857_cast_fp16 = mul(x = var_8535_cast_fp16, y = var_8536_to_fp16)[name = tensor<string, []>("aw_chunk_857_cast_fp16")];
+            tensor<string, []> var_8539_equation_0 = const()[name = tensor<string, []>("op_8539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8539_cast_fp16 = einsum(equation = var_8539_equation_0, values = (var_8309_cast_fp16, var_8246_cast_fp16))[name = tensor<string, []>("op_8539_cast_fp16")];
+            tensor<fp16, []> var_8540_to_fp16 = const()[name = tensor<string, []>("op_8540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_859_cast_fp16 = mul(x = var_8539_cast_fp16, y = var_8540_to_fp16)[name = tensor<string, []>("aw_chunk_859_cast_fp16")];
+            tensor<string, []> var_8543_equation_0 = const()[name = tensor<string, []>("op_8543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8543_cast_fp16 = einsum(equation = var_8543_equation_0, values = (var_8309_cast_fp16, var_8253_cast_fp16))[name = tensor<string, []>("op_8543_cast_fp16")];
+            tensor<fp16, []> var_8544_to_fp16 = const()[name = tensor<string, []>("op_8544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_861_cast_fp16 = mul(x = var_8543_cast_fp16, y = var_8544_to_fp16)[name = tensor<string, []>("aw_chunk_861_cast_fp16")];
+            tensor<string, []> var_8547_equation_0 = const()[name = tensor<string, []>("op_8547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8547_cast_fp16 = einsum(equation = var_8547_equation_0, values = (var_8309_cast_fp16, var_8260_cast_fp16))[name = tensor<string, []>("op_8547_cast_fp16")];
+            tensor<fp16, []> var_8548_to_fp16 = const()[name = tensor<string, []>("op_8548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_863_cast_fp16 = mul(x = var_8547_cast_fp16, y = var_8548_to_fp16)[name = tensor<string, []>("aw_chunk_863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8550_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_769_cast_fp16)[name = tensor<string, []>("op_8550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8551_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_771_cast_fp16)[name = tensor<string, []>("op_8551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8552_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_773_cast_fp16)[name = tensor<string, []>("op_8552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8553_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_775_cast_fp16)[name = tensor<string, []>("op_8553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8554_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_777_cast_fp16)[name = tensor<string, []>("op_8554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8555_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_779_cast_fp16)[name = tensor<string, []>("op_8555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8556_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_781_cast_fp16)[name = tensor<string, []>("op_8556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8557_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_783_cast_fp16)[name = tensor<string, []>("op_8557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8558_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_785_cast_fp16)[name = tensor<string, []>("op_8558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8559_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_787_cast_fp16)[name = tensor<string, []>("op_8559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8560_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_789_cast_fp16)[name = tensor<string, []>("op_8560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8561_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_791_cast_fp16)[name = tensor<string, []>("op_8561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8562_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_793_cast_fp16)[name = tensor<string, []>("op_8562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8563_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_795_cast_fp16)[name = tensor<string, []>("op_8563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8564_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_797_cast_fp16)[name = tensor<string, []>("op_8564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8565_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_799_cast_fp16)[name = tensor<string, []>("op_8565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8566_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_801_cast_fp16)[name = tensor<string, []>("op_8566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8567_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_803_cast_fp16)[name = tensor<string, []>("op_8567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8568_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_805_cast_fp16)[name = tensor<string, []>("op_8568_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8569_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_807_cast_fp16)[name = tensor<string, []>("op_8569_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8570_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_809_cast_fp16)[name = tensor<string, []>("op_8570_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8571_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_811_cast_fp16)[name = tensor<string, []>("op_8571_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8572_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_813_cast_fp16)[name = tensor<string, []>("op_8572_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8573_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_815_cast_fp16)[name = tensor<string, []>("op_8573_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8574_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_817_cast_fp16)[name = tensor<string, []>("op_8574_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8575_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_819_cast_fp16)[name = tensor<string, []>("op_8575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8576_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_821_cast_fp16)[name = tensor<string, []>("op_8576_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8577_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_823_cast_fp16)[name = tensor<string, []>("op_8577_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8578_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_825_cast_fp16)[name = tensor<string, []>("op_8578_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8579_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_827_cast_fp16)[name = tensor<string, []>("op_8579_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8580_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_829_cast_fp16)[name = tensor<string, []>("op_8580_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8581_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_831_cast_fp16)[name = tensor<string, []>("op_8581_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8582_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_833_cast_fp16)[name = tensor<string, []>("op_8582_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8583_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_835_cast_fp16)[name = tensor<string, []>("op_8583_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8584_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_837_cast_fp16)[name = tensor<string, []>("op_8584_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8585_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_839_cast_fp16)[name = tensor<string, []>("op_8585_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8586_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_841_cast_fp16)[name = tensor<string, []>("op_8586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8587_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_843_cast_fp16)[name = tensor<string, []>("op_8587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8588_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_845_cast_fp16)[name = tensor<string, []>("op_8588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8589_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_847_cast_fp16)[name = tensor<string, []>("op_8589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8590_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_849_cast_fp16)[name = tensor<string, []>("op_8590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8591_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_851_cast_fp16)[name = tensor<string, []>("op_8591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8592_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_853_cast_fp16)[name = tensor<string, []>("op_8592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8593_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_855_cast_fp16)[name = tensor<string, []>("op_8593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8594_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_857_cast_fp16)[name = tensor<string, []>("op_8594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8595_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_859_cast_fp16)[name = tensor<string, []>("op_8595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8596_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_861_cast_fp16)[name = tensor<string, []>("op_8596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8597_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_863_cast_fp16)[name = tensor<string, []>("op_8597_cast_fp16")];
+            tensor<string, []> var_8599_equation_0 = const()[name = tensor<string, []>("op_8599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8599_cast_fp16 = einsum(equation = var_8599_equation_0, values = (var_8311_cast_fp16, var_8550_cast_fp16))[name = tensor<string, []>("op_8599_cast_fp16")];
+            tensor<string, []> var_8601_equation_0 = const()[name = tensor<string, []>("op_8601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8601_cast_fp16 = einsum(equation = var_8601_equation_0, values = (var_8311_cast_fp16, var_8551_cast_fp16))[name = tensor<string, []>("op_8601_cast_fp16")];
+            tensor<string, []> var_8603_equation_0 = const()[name = tensor<string, []>("op_8603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8603_cast_fp16 = einsum(equation = var_8603_equation_0, values = (var_8311_cast_fp16, var_8552_cast_fp16))[name = tensor<string, []>("op_8603_cast_fp16")];
+            tensor<string, []> var_8605_equation_0 = const()[name = tensor<string, []>("op_8605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8605_cast_fp16 = einsum(equation = var_8605_equation_0, values = (var_8311_cast_fp16, var_8553_cast_fp16))[name = tensor<string, []>("op_8605_cast_fp16")];
+            tensor<string, []> var_8607_equation_0 = const()[name = tensor<string, []>("op_8607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8607_cast_fp16 = einsum(equation = var_8607_equation_0, values = (var_8315_cast_fp16, var_8554_cast_fp16))[name = tensor<string, []>("op_8607_cast_fp16")];
+            tensor<string, []> var_8609_equation_0 = const()[name = tensor<string, []>("op_8609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8609_cast_fp16 = einsum(equation = var_8609_equation_0, values = (var_8315_cast_fp16, var_8555_cast_fp16))[name = tensor<string, []>("op_8609_cast_fp16")];
+            tensor<string, []> var_8611_equation_0 = const()[name = tensor<string, []>("op_8611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8611_cast_fp16 = einsum(equation = var_8611_equation_0, values = (var_8315_cast_fp16, var_8556_cast_fp16))[name = tensor<string, []>("op_8611_cast_fp16")];
+            tensor<string, []> var_8613_equation_0 = const()[name = tensor<string, []>("op_8613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8613_cast_fp16 = einsum(equation = var_8613_equation_0, values = (var_8315_cast_fp16, var_8557_cast_fp16))[name = tensor<string, []>("op_8613_cast_fp16")];
+            tensor<string, []> var_8615_equation_0 = const()[name = tensor<string, []>("op_8615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8615_cast_fp16 = einsum(equation = var_8615_equation_0, values = (var_8319_cast_fp16, var_8558_cast_fp16))[name = tensor<string, []>("op_8615_cast_fp16")];
+            tensor<string, []> var_8617_equation_0 = const()[name = tensor<string, []>("op_8617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8617_cast_fp16 = einsum(equation = var_8617_equation_0, values = (var_8319_cast_fp16, var_8559_cast_fp16))[name = tensor<string, []>("op_8617_cast_fp16")];
+            tensor<string, []> var_8619_equation_0 = const()[name = tensor<string, []>("op_8619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8619_cast_fp16 = einsum(equation = var_8619_equation_0, values = (var_8319_cast_fp16, var_8560_cast_fp16))[name = tensor<string, []>("op_8619_cast_fp16")];
+            tensor<string, []> var_8621_equation_0 = const()[name = tensor<string, []>("op_8621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8621_cast_fp16 = einsum(equation = var_8621_equation_0, values = (var_8319_cast_fp16, var_8561_cast_fp16))[name = tensor<string, []>("op_8621_cast_fp16")];
+            tensor<string, []> var_8623_equation_0 = const()[name = tensor<string, []>("op_8623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8623_cast_fp16 = einsum(equation = var_8623_equation_0, values = (var_8323_cast_fp16, var_8562_cast_fp16))[name = tensor<string, []>("op_8623_cast_fp16")];
+            tensor<string, []> var_8625_equation_0 = const()[name = tensor<string, []>("op_8625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8625_cast_fp16 = einsum(equation = var_8625_equation_0, values = (var_8323_cast_fp16, var_8563_cast_fp16))[name = tensor<string, []>("op_8625_cast_fp16")];
+            tensor<string, []> var_8627_equation_0 = const()[name = tensor<string, []>("op_8627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8627_cast_fp16 = einsum(equation = var_8627_equation_0, values = (var_8323_cast_fp16, var_8564_cast_fp16))[name = tensor<string, []>("op_8627_cast_fp16")];
+            tensor<string, []> var_8629_equation_0 = const()[name = tensor<string, []>("op_8629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8629_cast_fp16 = einsum(equation = var_8629_equation_0, values = (var_8323_cast_fp16, var_8565_cast_fp16))[name = tensor<string, []>("op_8629_cast_fp16")];
+            tensor<string, []> var_8631_equation_0 = const()[name = tensor<string, []>("op_8631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8631_cast_fp16 = einsum(equation = var_8631_equation_0, values = (var_8327_cast_fp16, var_8566_cast_fp16))[name = tensor<string, []>("op_8631_cast_fp16")];
+            tensor<string, []> var_8633_equation_0 = const()[name = tensor<string, []>("op_8633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8633_cast_fp16 = einsum(equation = var_8633_equation_0, values = (var_8327_cast_fp16, var_8567_cast_fp16))[name = tensor<string, []>("op_8633_cast_fp16")];
+            tensor<string, []> var_8635_equation_0 = const()[name = tensor<string, []>("op_8635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8635_cast_fp16 = einsum(equation = var_8635_equation_0, values = (var_8327_cast_fp16, var_8568_cast_fp16))[name = tensor<string, []>("op_8635_cast_fp16")];
+            tensor<string, []> var_8637_equation_0 = const()[name = tensor<string, []>("op_8637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8637_cast_fp16 = einsum(equation = var_8637_equation_0, values = (var_8327_cast_fp16, var_8569_cast_fp16))[name = tensor<string, []>("op_8637_cast_fp16")];
+            tensor<string, []> var_8639_equation_0 = const()[name = tensor<string, []>("op_8639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8639_cast_fp16 = einsum(equation = var_8639_equation_0, values = (var_8331_cast_fp16, var_8570_cast_fp16))[name = tensor<string, []>("op_8639_cast_fp16")];
+            tensor<string, []> var_8641_equation_0 = const()[name = tensor<string, []>("op_8641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8641_cast_fp16 = einsum(equation = var_8641_equation_0, values = (var_8331_cast_fp16, var_8571_cast_fp16))[name = tensor<string, []>("op_8641_cast_fp16")];
+            tensor<string, []> var_8643_equation_0 = const()[name = tensor<string, []>("op_8643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8643_cast_fp16 = einsum(equation = var_8643_equation_0, values = (var_8331_cast_fp16, var_8572_cast_fp16))[name = tensor<string, []>("op_8643_cast_fp16")];
+            tensor<string, []> var_8645_equation_0 = const()[name = tensor<string, []>("op_8645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8645_cast_fp16 = einsum(equation = var_8645_equation_0, values = (var_8331_cast_fp16, var_8573_cast_fp16))[name = tensor<string, []>("op_8645_cast_fp16")];
+            tensor<string, []> var_8647_equation_0 = const()[name = tensor<string, []>("op_8647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8647_cast_fp16 = einsum(equation = var_8647_equation_0, values = (var_8335_cast_fp16, var_8574_cast_fp16))[name = tensor<string, []>("op_8647_cast_fp16")];
+            tensor<string, []> var_8649_equation_0 = const()[name = tensor<string, []>("op_8649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8649_cast_fp16 = einsum(equation = var_8649_equation_0, values = (var_8335_cast_fp16, var_8575_cast_fp16))[name = tensor<string, []>("op_8649_cast_fp16")];
+            tensor<string, []> var_8651_equation_0 = const()[name = tensor<string, []>("op_8651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8651_cast_fp16 = einsum(equation = var_8651_equation_0, values = (var_8335_cast_fp16, var_8576_cast_fp16))[name = tensor<string, []>("op_8651_cast_fp16")];
+            tensor<string, []> var_8653_equation_0 = const()[name = tensor<string, []>("op_8653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8653_cast_fp16 = einsum(equation = var_8653_equation_0, values = (var_8335_cast_fp16, var_8577_cast_fp16))[name = tensor<string, []>("op_8653_cast_fp16")];
+            tensor<string, []> var_8655_equation_0 = const()[name = tensor<string, []>("op_8655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8655_cast_fp16 = einsum(equation = var_8655_equation_0, values = (var_8339_cast_fp16, var_8578_cast_fp16))[name = tensor<string, []>("op_8655_cast_fp16")];
+            tensor<string, []> var_8657_equation_0 = const()[name = tensor<string, []>("op_8657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8657_cast_fp16 = einsum(equation = var_8657_equation_0, values = (var_8339_cast_fp16, var_8579_cast_fp16))[name = tensor<string, []>("op_8657_cast_fp16")];
+            tensor<string, []> var_8659_equation_0 = const()[name = tensor<string, []>("op_8659_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8659_cast_fp16 = einsum(equation = var_8659_equation_0, values = (var_8339_cast_fp16, var_8580_cast_fp16))[name = tensor<string, []>("op_8659_cast_fp16")];
+            tensor<string, []> var_8661_equation_0 = const()[name = tensor<string, []>("op_8661_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8661_cast_fp16 = einsum(equation = var_8661_equation_0, values = (var_8339_cast_fp16, var_8581_cast_fp16))[name = tensor<string, []>("op_8661_cast_fp16")];
+            tensor<string, []> var_8663_equation_0 = const()[name = tensor<string, []>("op_8663_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8663_cast_fp16 = einsum(equation = var_8663_equation_0, values = (var_8343_cast_fp16, var_8582_cast_fp16))[name = tensor<string, []>("op_8663_cast_fp16")];
+            tensor<string, []> var_8665_equation_0 = const()[name = tensor<string, []>("op_8665_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8665_cast_fp16 = einsum(equation = var_8665_equation_0, values = (var_8343_cast_fp16, var_8583_cast_fp16))[name = tensor<string, []>("op_8665_cast_fp16")];
+            tensor<string, []> var_8667_equation_0 = const()[name = tensor<string, []>("op_8667_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8667_cast_fp16 = einsum(equation = var_8667_equation_0, values = (var_8343_cast_fp16, var_8584_cast_fp16))[name = tensor<string, []>("op_8667_cast_fp16")];
+            tensor<string, []> var_8669_equation_0 = const()[name = tensor<string, []>("op_8669_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8669_cast_fp16 = einsum(equation = var_8669_equation_0, values = (var_8343_cast_fp16, var_8585_cast_fp16))[name = tensor<string, []>("op_8669_cast_fp16")];
+            tensor<string, []> var_8671_equation_0 = const()[name = tensor<string, []>("op_8671_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8671_cast_fp16 = einsum(equation = var_8671_equation_0, values = (var_8347_cast_fp16, var_8586_cast_fp16))[name = tensor<string, []>("op_8671_cast_fp16")];
+            tensor<string, []> var_8673_equation_0 = const()[name = tensor<string, []>("op_8673_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8673_cast_fp16 = einsum(equation = var_8673_equation_0, values = (var_8347_cast_fp16, var_8587_cast_fp16))[name = tensor<string, []>("op_8673_cast_fp16")];
+            tensor<string, []> var_8675_equation_0 = const()[name = tensor<string, []>("op_8675_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8675_cast_fp16 = einsum(equation = var_8675_equation_0, values = (var_8347_cast_fp16, var_8588_cast_fp16))[name = tensor<string, []>("op_8675_cast_fp16")];
+            tensor<string, []> var_8677_equation_0 = const()[name = tensor<string, []>("op_8677_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8677_cast_fp16 = einsum(equation = var_8677_equation_0, values = (var_8347_cast_fp16, var_8589_cast_fp16))[name = tensor<string, []>("op_8677_cast_fp16")];
+            tensor<string, []> var_8679_equation_0 = const()[name = tensor<string, []>("op_8679_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8679_cast_fp16 = einsum(equation = var_8679_equation_0, values = (var_8351_cast_fp16, var_8590_cast_fp16))[name = tensor<string, []>("op_8679_cast_fp16")];
+            tensor<string, []> var_8681_equation_0 = const()[name = tensor<string, []>("op_8681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8681_cast_fp16 = einsum(equation = var_8681_equation_0, values = (var_8351_cast_fp16, var_8591_cast_fp16))[name = tensor<string, []>("op_8681_cast_fp16")];
+            tensor<string, []> var_8683_equation_0 = const()[name = tensor<string, []>("op_8683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8683_cast_fp16 = einsum(equation = var_8683_equation_0, values = (var_8351_cast_fp16, var_8592_cast_fp16))[name = tensor<string, []>("op_8683_cast_fp16")];
+            tensor<string, []> var_8685_equation_0 = const()[name = tensor<string, []>("op_8685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8685_cast_fp16 = einsum(equation = var_8685_equation_0, values = (var_8351_cast_fp16, var_8593_cast_fp16))[name = tensor<string, []>("op_8685_cast_fp16")];
+            tensor<string, []> var_8687_equation_0 = const()[name = tensor<string, []>("op_8687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8687_cast_fp16 = einsum(equation = var_8687_equation_0, values = (var_8355_cast_fp16, var_8594_cast_fp16))[name = tensor<string, []>("op_8687_cast_fp16")];
+            tensor<string, []> var_8689_equation_0 = const()[name = tensor<string, []>("op_8689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8689_cast_fp16 = einsum(equation = var_8689_equation_0, values = (var_8355_cast_fp16, var_8595_cast_fp16))[name = tensor<string, []>("op_8689_cast_fp16")];
+            tensor<string, []> var_8691_equation_0 = const()[name = tensor<string, []>("op_8691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8691_cast_fp16 = einsum(equation = var_8691_equation_0, values = (var_8355_cast_fp16, var_8596_cast_fp16))[name = tensor<string, []>("op_8691_cast_fp16")];
+            tensor<string, []> var_8693_equation_0 = const()[name = tensor<string, []>("op_8693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8693_cast_fp16 = einsum(equation = var_8693_equation_0, values = (var_8355_cast_fp16, var_8597_cast_fp16))[name = tensor<string, []>("op_8693_cast_fp16")];
+            tensor<bool, []> var_8695_interleave_0 = const()[name = tensor<string, []>("op_8695_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8695_cast_fp16 = concat(axis = var_7806, interleave = var_8695_interleave_0, values = (var_8599_cast_fp16, var_8601_cast_fp16, var_8603_cast_fp16, var_8605_cast_fp16))[name = tensor<string, []>("op_8695_cast_fp16")];
+            tensor<bool, []> var_8697_interleave_0 = const()[name = tensor<string, []>("op_8697_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8697_cast_fp16 = concat(axis = var_7806, interleave = var_8697_interleave_0, values = (var_8607_cast_fp16, var_8609_cast_fp16, var_8611_cast_fp16, var_8613_cast_fp16))[name = tensor<string, []>("op_8697_cast_fp16")];
+            tensor<bool, []> var_8699_interleave_0 = const()[name = tensor<string, []>("op_8699_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8699_cast_fp16 = concat(axis = var_7806, interleave = var_8699_interleave_0, values = (var_8615_cast_fp16, var_8617_cast_fp16, var_8619_cast_fp16, var_8621_cast_fp16))[name = tensor<string, []>("op_8699_cast_fp16")];
+            tensor<bool, []> var_8701_interleave_0 = const()[name = tensor<string, []>("op_8701_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8701_cast_fp16 = concat(axis = var_7806, interleave = var_8701_interleave_0, values = (var_8623_cast_fp16, var_8625_cast_fp16, var_8627_cast_fp16, var_8629_cast_fp16))[name = tensor<string, []>("op_8701_cast_fp16")];
+            tensor<bool, []> var_8703_interleave_0 = const()[name = tensor<string, []>("op_8703_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8703_cast_fp16 = concat(axis = var_7806, interleave = var_8703_interleave_0, values = (var_8631_cast_fp16, var_8633_cast_fp16, var_8635_cast_fp16, var_8637_cast_fp16))[name = tensor<string, []>("op_8703_cast_fp16")];
+            tensor<bool, []> var_8705_interleave_0 = const()[name = tensor<string, []>("op_8705_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8705_cast_fp16 = concat(axis = var_7806, interleave = var_8705_interleave_0, values = (var_8639_cast_fp16, var_8641_cast_fp16, var_8643_cast_fp16, var_8645_cast_fp16))[name = tensor<string, []>("op_8705_cast_fp16")];
+            tensor<bool, []> var_8707_interleave_0 = const()[name = tensor<string, []>("op_8707_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8707_cast_fp16 = concat(axis = var_7806, interleave = var_8707_interleave_0, values = (var_8647_cast_fp16, var_8649_cast_fp16, var_8651_cast_fp16, var_8653_cast_fp16))[name = tensor<string, []>("op_8707_cast_fp16")];
+            tensor<bool, []> var_8709_interleave_0 = const()[name = tensor<string, []>("op_8709_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8709_cast_fp16 = concat(axis = var_7806, interleave = var_8709_interleave_0, values = (var_8655_cast_fp16, var_8657_cast_fp16, var_8659_cast_fp16, var_8661_cast_fp16))[name = tensor<string, []>("op_8709_cast_fp16")];
+            tensor<bool, []> var_8711_interleave_0 = const()[name = tensor<string, []>("op_8711_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8711_cast_fp16 = concat(axis = var_7806, interleave = var_8711_interleave_0, values = (var_8663_cast_fp16, var_8665_cast_fp16, var_8667_cast_fp16, var_8669_cast_fp16))[name = tensor<string, []>("op_8711_cast_fp16")];
+            tensor<bool, []> var_8713_interleave_0 = const()[name = tensor<string, []>("op_8713_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8713_cast_fp16 = concat(axis = var_7806, interleave = var_8713_interleave_0, values = (var_8671_cast_fp16, var_8673_cast_fp16, var_8675_cast_fp16, var_8677_cast_fp16))[name = tensor<string, []>("op_8713_cast_fp16")];
+            tensor<bool, []> var_8715_interleave_0 = const()[name = tensor<string, []>("op_8715_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8715_cast_fp16 = concat(axis = var_7806, interleave = var_8715_interleave_0, values = (var_8679_cast_fp16, var_8681_cast_fp16, var_8683_cast_fp16, var_8685_cast_fp16))[name = tensor<string, []>("op_8715_cast_fp16")];
+            tensor<bool, []> var_8717_interleave_0 = const()[name = tensor<string, []>("op_8717_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8717_cast_fp16 = concat(axis = var_7806, interleave = var_8717_interleave_0, values = (var_8687_cast_fp16, var_8689_cast_fp16, var_8691_cast_fp16, var_8693_cast_fp16))[name = tensor<string, []>("op_8717_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = concat(axis = var_7823, interleave = input_65_interleave_0, values = (var_8695_cast_fp16, var_8697_cast_fp16, var_8699_cast_fp16, var_8701_cast_fp16, var_8703_cast_fp16, var_8705_cast_fp16, var_8707_cast_fp16, var_8709_cast_fp16, var_8711_cast_fp16, var_8713_cast_fp16, var_8715_cast_fp16, var_8717_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<int32, [2]> var_8722 = const()[name = tensor<string, []>("op_8722"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8724 = const()[name = tensor<string, []>("op_8724"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123165120)))];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124344832)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_35_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = var_8724, groups = var_7823, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_8722, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> var_8730 = const()[name = tensor<string, []>("op_8730"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_35_cast_fp16 = reduce_mean(axes = var_8730, keep_dims = var_7824, x = inputs_35_cast_fp16)[name = tensor<string, []>("channels_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_sq_35_cast_fp16")];
+            tensor<int32, [1]> var_8734 = const()[name = tensor<string, []>("op_8734"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_8735_cast_fp16 = reduce_mean(axes = var_8734, keep_dims = var_7824, x = zero_mean_sq_35_cast_fp16)[name = tensor<string, []>("op_8735_cast_fp16")];
+            tensor<fp16, []> var_8736_to_fp16 = const()[name = tensor<string, []>("op_8736_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_8737_cast_fp16 = add(x = var_8735_cast_fp16, y = var_8736_to_fp16)[name = tensor<string, []>("op_8737_cast_fp16")];
+            tensor<fp16, []> denom_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_8737_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124346432)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124348032)))];
+            tensor<fp16, []> input_67_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_67_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<int32, [2]> var_8748 = const()[name = tensor<string, []>("op_8748"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8750 = const()[name = tensor<string, []>("op_8750"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124349632)))];
+            tensor<fp16, [3072]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129068288)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = var_8750, groups = var_7823, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = var_8748, weight = layers_8_fc1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<int32, [2]> var_8756 = const()[name = tensor<string, []>("op_8756"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8758 = const()[name = tensor<string, []>("op_8758"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129074496)))];
+            tensor<fp16, [768]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133793152)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_21_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = var_8758, groups = var_7823, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_8756, weight = layers_8_fc2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_8765 = const()[name = tensor<string, []>("op_8765"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_8782 = const()[name = tensor<string, []>("op_8782"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_8783 = const()[name = tensor<string, []>("op_8783"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_8793 = const()[name = tensor<string, []>("op_8793"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_37_cast_fp16 = reduce_mean(axes = var_8793, keep_dims = var_8783, x = inputs_37_cast_fp16)[name = tensor<string, []>("channels_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_37_cast_fp16 = sub(x = inputs_37_cast_fp16, y = channels_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = zero_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_sq_37_cast_fp16")];
+            tensor<int32, [1]> var_8797 = const()[name = tensor<string, []>("op_8797"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_8798_cast_fp16 = reduce_mean(axes = var_8797, keep_dims = var_8783, x = zero_mean_sq_37_cast_fp16)[name = tensor<string, []>("op_8798_cast_fp16")];
+            tensor<fp16, []> var_8799_to_fp16 = const()[name = tensor<string, []>("op_8799_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_8800_cast_fp16 = add(x = var_8798_cast_fp16, y = var_8799_to_fp16)[name = tensor<string, []>("op_8800_cast_fp16")];
+            tensor<fp16, []> denom_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0_to_fp16, x = var_8800_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = denom_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133794752)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133796352)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_8815 = const()[name = tensor<string, []>("op_8815"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8817 = const()[name = tensor<string, []>("op_8817"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133797952)))];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134977664)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_19_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = var_8817, groups = var_8782, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_8815, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<int32, [2]> var_8821 = const()[name = tensor<string, []>("op_8821"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8823 = const()[name = tensor<string, []>("op_8823"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134979264)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = conv(dilations = var_8823, groups = var_8782, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_8821, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<int32, [2]> var_8828 = const()[name = tensor<string, []>("op_8828"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8830 = const()[name = tensor<string, []>("op_8830"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136158976)))];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137338688)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = var_8830, groups = var_8782, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_8828, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_8837_begin_0 = const()[name = tensor<string, []>("op_8837_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8837_end_0 = const()[name = tensor<string, []>("op_8837_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8837_end_mask_0 = const()[name = tensor<string, []>("op_8837_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8837_cast_fp16 = slice_by_index(begin = var_8837_begin_0, end = var_8837_end_0, end_mask = var_8837_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8837_cast_fp16")];
+            tensor<int32, [4]> var_8841_begin_0 = const()[name = tensor<string, []>("op_8841_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_8841_end_0 = const()[name = tensor<string, []>("op_8841_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_8841_end_mask_0 = const()[name = tensor<string, []>("op_8841_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8841_cast_fp16 = slice_by_index(begin = var_8841_begin_0, end = var_8841_end_0, end_mask = var_8841_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8841_cast_fp16")];
+            tensor<int32, [4]> var_8845_begin_0 = const()[name = tensor<string, []>("op_8845_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_8845_end_0 = const()[name = tensor<string, []>("op_8845_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_8845_end_mask_0 = const()[name = tensor<string, []>("op_8845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8845_cast_fp16 = slice_by_index(begin = var_8845_begin_0, end = var_8845_end_0, end_mask = var_8845_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8845_cast_fp16")];
+            tensor<int32, [4]> var_8849_begin_0 = const()[name = tensor<string, []>("op_8849_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_8849_end_0 = const()[name = tensor<string, []>("op_8849_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_8849_end_mask_0 = const()[name = tensor<string, []>("op_8849_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8849_cast_fp16 = slice_by_index(begin = var_8849_begin_0, end = var_8849_end_0, end_mask = var_8849_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8849_cast_fp16")];
+            tensor<int32, [4]> var_8853_begin_0 = const()[name = tensor<string, []>("op_8853_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_8853_end_0 = const()[name = tensor<string, []>("op_8853_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_8853_end_mask_0 = const()[name = tensor<string, []>("op_8853_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8853_cast_fp16 = slice_by_index(begin = var_8853_begin_0, end = var_8853_end_0, end_mask = var_8853_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8853_cast_fp16")];
+            tensor<int32, [4]> var_8857_begin_0 = const()[name = tensor<string, []>("op_8857_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_8857_end_0 = const()[name = tensor<string, []>("op_8857_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_8857_end_mask_0 = const()[name = tensor<string, []>("op_8857_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8857_cast_fp16 = slice_by_index(begin = var_8857_begin_0, end = var_8857_end_0, end_mask = var_8857_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8857_cast_fp16")];
+            tensor<int32, [4]> var_8861_begin_0 = const()[name = tensor<string, []>("op_8861_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_8861_end_0 = const()[name = tensor<string, []>("op_8861_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_8861_end_mask_0 = const()[name = tensor<string, []>("op_8861_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8861_cast_fp16 = slice_by_index(begin = var_8861_begin_0, end = var_8861_end_0, end_mask = var_8861_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8861_cast_fp16")];
+            tensor<int32, [4]> var_8865_begin_0 = const()[name = tensor<string, []>("op_8865_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_8865_end_0 = const()[name = tensor<string, []>("op_8865_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_8865_end_mask_0 = const()[name = tensor<string, []>("op_8865_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8865_cast_fp16 = slice_by_index(begin = var_8865_begin_0, end = var_8865_end_0, end_mask = var_8865_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8865_cast_fp16")];
+            tensor<int32, [4]> var_8869_begin_0 = const()[name = tensor<string, []>("op_8869_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_8869_end_0 = const()[name = tensor<string, []>("op_8869_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_8869_end_mask_0 = const()[name = tensor<string, []>("op_8869_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8869_cast_fp16 = slice_by_index(begin = var_8869_begin_0, end = var_8869_end_0, end_mask = var_8869_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8869_cast_fp16")];
+            tensor<int32, [4]> var_8873_begin_0 = const()[name = tensor<string, []>("op_8873_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_8873_end_0 = const()[name = tensor<string, []>("op_8873_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_8873_end_mask_0 = const()[name = tensor<string, []>("op_8873_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8873_cast_fp16 = slice_by_index(begin = var_8873_begin_0, end = var_8873_end_0, end_mask = var_8873_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8873_cast_fp16")];
+            tensor<int32, [4]> var_8877_begin_0 = const()[name = tensor<string, []>("op_8877_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_8877_end_0 = const()[name = tensor<string, []>("op_8877_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_8877_end_mask_0 = const()[name = tensor<string, []>("op_8877_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8877_cast_fp16 = slice_by_index(begin = var_8877_begin_0, end = var_8877_end_0, end_mask = var_8877_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8877_cast_fp16")];
+            tensor<int32, [4]> var_8881_begin_0 = const()[name = tensor<string, []>("op_8881_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_8881_end_0 = const()[name = tensor<string, []>("op_8881_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_8881_end_mask_0 = const()[name = tensor<string, []>("op_8881_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8881_cast_fp16 = slice_by_index(begin = var_8881_begin_0, end = var_8881_end_0, end_mask = var_8881_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8881_cast_fp16")];
+            tensor<int32, [4]> var_8890_begin_0 = const()[name = tensor<string, []>("op_8890_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8890_end_0 = const()[name = tensor<string, []>("op_8890_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8890_end_mask_0 = const()[name = tensor<string, []>("op_8890_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8890_cast_fp16 = slice_by_index(begin = var_8890_begin_0, end = var_8890_end_0, end_mask = var_8890_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8890_cast_fp16")];
+            tensor<int32, [4]> var_8897_begin_0 = const()[name = tensor<string, []>("op_8897_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8897_end_0 = const()[name = tensor<string, []>("op_8897_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8897_end_mask_0 = const()[name = tensor<string, []>("op_8897_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8897_cast_fp16 = slice_by_index(begin = var_8897_begin_0, end = var_8897_end_0, end_mask = var_8897_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8897_cast_fp16")];
+            tensor<int32, [4]> var_8904_begin_0 = const()[name = tensor<string, []>("op_8904_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8904_end_0 = const()[name = tensor<string, []>("op_8904_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8904_end_mask_0 = const()[name = tensor<string, []>("op_8904_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8904_cast_fp16 = slice_by_index(begin = var_8904_begin_0, end = var_8904_end_0, end_mask = var_8904_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8904_cast_fp16")];
+            tensor<int32, [4]> var_8911_begin_0 = const()[name = tensor<string, []>("op_8911_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8911_end_0 = const()[name = tensor<string, []>("op_8911_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8911_end_mask_0 = const()[name = tensor<string, []>("op_8911_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8911_cast_fp16 = slice_by_index(begin = var_8911_begin_0, end = var_8911_end_0, end_mask = var_8911_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8911_cast_fp16")];
+            tensor<int32, [4]> var_8918_begin_0 = const()[name = tensor<string, []>("op_8918_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8918_end_0 = const()[name = tensor<string, []>("op_8918_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8918_end_mask_0 = const()[name = tensor<string, []>("op_8918_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8918_cast_fp16 = slice_by_index(begin = var_8918_begin_0, end = var_8918_end_0, end_mask = var_8918_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8918_cast_fp16")];
+            tensor<int32, [4]> var_8925_begin_0 = const()[name = tensor<string, []>("op_8925_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8925_end_0 = const()[name = tensor<string, []>("op_8925_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8925_end_mask_0 = const()[name = tensor<string, []>("op_8925_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8925_cast_fp16 = slice_by_index(begin = var_8925_begin_0, end = var_8925_end_0, end_mask = var_8925_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8925_cast_fp16")];
+            tensor<int32, [4]> var_8932_begin_0 = const()[name = tensor<string, []>("op_8932_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8932_end_0 = const()[name = tensor<string, []>("op_8932_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8932_end_mask_0 = const()[name = tensor<string, []>("op_8932_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8932_cast_fp16 = slice_by_index(begin = var_8932_begin_0, end = var_8932_end_0, end_mask = var_8932_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8932_cast_fp16")];
+            tensor<int32, [4]> var_8939_begin_0 = const()[name = tensor<string, []>("op_8939_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8939_end_0 = const()[name = tensor<string, []>("op_8939_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8939_end_mask_0 = const()[name = tensor<string, []>("op_8939_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8939_cast_fp16 = slice_by_index(begin = var_8939_begin_0, end = var_8939_end_0, end_mask = var_8939_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8939_cast_fp16")];
+            tensor<int32, [4]> var_8946_begin_0 = const()[name = tensor<string, []>("op_8946_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8946_end_0 = const()[name = tensor<string, []>("op_8946_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8946_end_mask_0 = const()[name = tensor<string, []>("op_8946_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8946_cast_fp16 = slice_by_index(begin = var_8946_begin_0, end = var_8946_end_0, end_mask = var_8946_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8946_cast_fp16")];
+            tensor<int32, [4]> var_8953_begin_0 = const()[name = tensor<string, []>("op_8953_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8953_end_0 = const()[name = tensor<string, []>("op_8953_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8953_end_mask_0 = const()[name = tensor<string, []>("op_8953_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8953_cast_fp16 = slice_by_index(begin = var_8953_begin_0, end = var_8953_end_0, end_mask = var_8953_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8953_cast_fp16")];
+            tensor<int32, [4]> var_8960_begin_0 = const()[name = tensor<string, []>("op_8960_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8960_end_0 = const()[name = tensor<string, []>("op_8960_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8960_end_mask_0 = const()[name = tensor<string, []>("op_8960_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8960_cast_fp16 = slice_by_index(begin = var_8960_begin_0, end = var_8960_end_0, end_mask = var_8960_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8960_cast_fp16")];
+            tensor<int32, [4]> var_8967_begin_0 = const()[name = tensor<string, []>("op_8967_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8967_end_0 = const()[name = tensor<string, []>("op_8967_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8967_end_mask_0 = const()[name = tensor<string, []>("op_8967_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8967_cast_fp16 = slice_by_index(begin = var_8967_begin_0, end = var_8967_end_0, end_mask = var_8967_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8967_cast_fp16")];
+            tensor<int32, [4]> var_8974_begin_0 = const()[name = tensor<string, []>("op_8974_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8974_end_0 = const()[name = tensor<string, []>("op_8974_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8974_end_mask_0 = const()[name = tensor<string, []>("op_8974_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8974_cast_fp16 = slice_by_index(begin = var_8974_begin_0, end = var_8974_end_0, end_mask = var_8974_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8974_cast_fp16")];
+            tensor<int32, [4]> var_8981_begin_0 = const()[name = tensor<string, []>("op_8981_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8981_end_0 = const()[name = tensor<string, []>("op_8981_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8981_end_mask_0 = const()[name = tensor<string, []>("op_8981_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8981_cast_fp16 = slice_by_index(begin = var_8981_begin_0, end = var_8981_end_0, end_mask = var_8981_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8981_cast_fp16")];
+            tensor<int32, [4]> var_8988_begin_0 = const()[name = tensor<string, []>("op_8988_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8988_end_0 = const()[name = tensor<string, []>("op_8988_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8988_end_mask_0 = const()[name = tensor<string, []>("op_8988_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8988_cast_fp16 = slice_by_index(begin = var_8988_begin_0, end = var_8988_end_0, end_mask = var_8988_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8988_cast_fp16")];
+            tensor<int32, [4]> var_8995_begin_0 = const()[name = tensor<string, []>("op_8995_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8995_end_0 = const()[name = tensor<string, []>("op_8995_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8995_end_mask_0 = const()[name = tensor<string, []>("op_8995_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8995_cast_fp16 = slice_by_index(begin = var_8995_begin_0, end = var_8995_end_0, end_mask = var_8995_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8995_cast_fp16")];
+            tensor<int32, [4]> var_9002_begin_0 = const()[name = tensor<string, []>("op_9002_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9002_end_0 = const()[name = tensor<string, []>("op_9002_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9002_end_mask_0 = const()[name = tensor<string, []>("op_9002_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9002_cast_fp16 = slice_by_index(begin = var_9002_begin_0, end = var_9002_end_0, end_mask = var_9002_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9002_cast_fp16")];
+            tensor<int32, [4]> var_9009_begin_0 = const()[name = tensor<string, []>("op_9009_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9009_end_0 = const()[name = tensor<string, []>("op_9009_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9009_end_mask_0 = const()[name = tensor<string, []>("op_9009_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9009_cast_fp16 = slice_by_index(begin = var_9009_begin_0, end = var_9009_end_0, end_mask = var_9009_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9009_cast_fp16")];
+            tensor<int32, [4]> var_9016_begin_0 = const()[name = tensor<string, []>("op_9016_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9016_end_0 = const()[name = tensor<string, []>("op_9016_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9016_end_mask_0 = const()[name = tensor<string, []>("op_9016_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9016_cast_fp16 = slice_by_index(begin = var_9016_begin_0, end = var_9016_end_0, end_mask = var_9016_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9016_cast_fp16")];
+            tensor<int32, [4]> var_9023_begin_0 = const()[name = tensor<string, []>("op_9023_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9023_end_0 = const()[name = tensor<string, []>("op_9023_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9023_end_mask_0 = const()[name = tensor<string, []>("op_9023_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9023_cast_fp16 = slice_by_index(begin = var_9023_begin_0, end = var_9023_end_0, end_mask = var_9023_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9023_cast_fp16")];
+            tensor<int32, [4]> var_9030_begin_0 = const()[name = tensor<string, []>("op_9030_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9030_end_0 = const()[name = tensor<string, []>("op_9030_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9030_end_mask_0 = const()[name = tensor<string, []>("op_9030_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9030_cast_fp16 = slice_by_index(begin = var_9030_begin_0, end = var_9030_end_0, end_mask = var_9030_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9030_cast_fp16")];
+            tensor<int32, [4]> var_9037_begin_0 = const()[name = tensor<string, []>("op_9037_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9037_end_0 = const()[name = tensor<string, []>("op_9037_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9037_end_mask_0 = const()[name = tensor<string, []>("op_9037_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9037_cast_fp16 = slice_by_index(begin = var_9037_begin_0, end = var_9037_end_0, end_mask = var_9037_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9037_cast_fp16")];
+            tensor<int32, [4]> var_9044_begin_0 = const()[name = tensor<string, []>("op_9044_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9044_end_0 = const()[name = tensor<string, []>("op_9044_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9044_end_mask_0 = const()[name = tensor<string, []>("op_9044_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9044_cast_fp16 = slice_by_index(begin = var_9044_begin_0, end = var_9044_end_0, end_mask = var_9044_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9044_cast_fp16")];
+            tensor<int32, [4]> var_9051_begin_0 = const()[name = tensor<string, []>("op_9051_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9051_end_0 = const()[name = tensor<string, []>("op_9051_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9051_end_mask_0 = const()[name = tensor<string, []>("op_9051_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9051_cast_fp16 = slice_by_index(begin = var_9051_begin_0, end = var_9051_end_0, end_mask = var_9051_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9051_cast_fp16")];
+            tensor<int32, [4]> var_9058_begin_0 = const()[name = tensor<string, []>("op_9058_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9058_end_0 = const()[name = tensor<string, []>("op_9058_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9058_end_mask_0 = const()[name = tensor<string, []>("op_9058_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9058_cast_fp16 = slice_by_index(begin = var_9058_begin_0, end = var_9058_end_0, end_mask = var_9058_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9058_cast_fp16")];
+            tensor<int32, [4]> var_9065_begin_0 = const()[name = tensor<string, []>("op_9065_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9065_end_0 = const()[name = tensor<string, []>("op_9065_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9065_end_mask_0 = const()[name = tensor<string, []>("op_9065_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9065_cast_fp16 = slice_by_index(begin = var_9065_begin_0, end = var_9065_end_0, end_mask = var_9065_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9065_cast_fp16")];
+            tensor<int32, [4]> var_9072_begin_0 = const()[name = tensor<string, []>("op_9072_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9072_end_0 = const()[name = tensor<string, []>("op_9072_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9072_end_mask_0 = const()[name = tensor<string, []>("op_9072_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9072_cast_fp16 = slice_by_index(begin = var_9072_begin_0, end = var_9072_end_0, end_mask = var_9072_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9072_cast_fp16")];
+            tensor<int32, [4]> var_9079_begin_0 = const()[name = tensor<string, []>("op_9079_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9079_end_0 = const()[name = tensor<string, []>("op_9079_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9079_end_mask_0 = const()[name = tensor<string, []>("op_9079_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9079_cast_fp16 = slice_by_index(begin = var_9079_begin_0, end = var_9079_end_0, end_mask = var_9079_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9079_cast_fp16")];
+            tensor<int32, [4]> var_9086_begin_0 = const()[name = tensor<string, []>("op_9086_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9086_end_0 = const()[name = tensor<string, []>("op_9086_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9086_end_mask_0 = const()[name = tensor<string, []>("op_9086_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9086_cast_fp16 = slice_by_index(begin = var_9086_begin_0, end = var_9086_end_0, end_mask = var_9086_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9086_cast_fp16")];
+            tensor<int32, [4]> var_9093_begin_0 = const()[name = tensor<string, []>("op_9093_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9093_end_0 = const()[name = tensor<string, []>("op_9093_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9093_end_mask_0 = const()[name = tensor<string, []>("op_9093_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9093_cast_fp16 = slice_by_index(begin = var_9093_begin_0, end = var_9093_end_0, end_mask = var_9093_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9093_cast_fp16")];
+            tensor<int32, [4]> var_9100_begin_0 = const()[name = tensor<string, []>("op_9100_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9100_end_0 = const()[name = tensor<string, []>("op_9100_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9100_end_mask_0 = const()[name = tensor<string, []>("op_9100_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9100_cast_fp16 = slice_by_index(begin = var_9100_begin_0, end = var_9100_end_0, end_mask = var_9100_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9100_cast_fp16")];
+            tensor<int32, [4]> var_9107_begin_0 = const()[name = tensor<string, []>("op_9107_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9107_end_0 = const()[name = tensor<string, []>("op_9107_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9107_end_mask_0 = const()[name = tensor<string, []>("op_9107_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9107_cast_fp16 = slice_by_index(begin = var_9107_begin_0, end = var_9107_end_0, end_mask = var_9107_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9107_cast_fp16")];
+            tensor<int32, [4]> var_9114_begin_0 = const()[name = tensor<string, []>("op_9114_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9114_end_0 = const()[name = tensor<string, []>("op_9114_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9114_end_mask_0 = const()[name = tensor<string, []>("op_9114_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9114_cast_fp16 = slice_by_index(begin = var_9114_begin_0, end = var_9114_end_0, end_mask = var_9114_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9114_cast_fp16")];
+            tensor<int32, [4]> var_9121_begin_0 = const()[name = tensor<string, []>("op_9121_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9121_end_0 = const()[name = tensor<string, []>("op_9121_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9121_end_mask_0 = const()[name = tensor<string, []>("op_9121_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9121_cast_fp16 = slice_by_index(begin = var_9121_begin_0, end = var_9121_end_0, end_mask = var_9121_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9121_cast_fp16")];
+            tensor<int32, [4]> var_9128_begin_0 = const()[name = tensor<string, []>("op_9128_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9128_end_0 = const()[name = tensor<string, []>("op_9128_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9128_end_mask_0 = const()[name = tensor<string, []>("op_9128_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9128_cast_fp16 = slice_by_index(begin = var_9128_begin_0, end = var_9128_end_0, end_mask = var_9128_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9128_cast_fp16")];
+            tensor<int32, [4]> var_9135_begin_0 = const()[name = tensor<string, []>("op_9135_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9135_end_0 = const()[name = tensor<string, []>("op_9135_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9135_end_mask_0 = const()[name = tensor<string, []>("op_9135_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9135_cast_fp16 = slice_by_index(begin = var_9135_begin_0, end = var_9135_end_0, end_mask = var_9135_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9135_cast_fp16")];
+            tensor<int32, [4]> var_9142_begin_0 = const()[name = tensor<string, []>("op_9142_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9142_end_0 = const()[name = tensor<string, []>("op_9142_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9142_end_mask_0 = const()[name = tensor<string, []>("op_9142_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9142_cast_fp16 = slice_by_index(begin = var_9142_begin_0, end = var_9142_end_0, end_mask = var_9142_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9142_cast_fp16")];
+            tensor<int32, [4]> var_9149_begin_0 = const()[name = tensor<string, []>("op_9149_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9149_end_0 = const()[name = tensor<string, []>("op_9149_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9149_end_mask_0 = const()[name = tensor<string, []>("op_9149_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9149_cast_fp16 = slice_by_index(begin = var_9149_begin_0, end = var_9149_end_0, end_mask = var_9149_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9149_cast_fp16")];
+            tensor<int32, [4]> var_9156_begin_0 = const()[name = tensor<string, []>("op_9156_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9156_end_0 = const()[name = tensor<string, []>("op_9156_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9156_end_mask_0 = const()[name = tensor<string, []>("op_9156_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9156_cast_fp16 = slice_by_index(begin = var_9156_begin_0, end = var_9156_end_0, end_mask = var_9156_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9156_cast_fp16")];
+            tensor<int32, [4]> var_9163_begin_0 = const()[name = tensor<string, []>("op_9163_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9163_end_0 = const()[name = tensor<string, []>("op_9163_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9163_end_mask_0 = const()[name = tensor<string, []>("op_9163_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9163_cast_fp16 = slice_by_index(begin = var_9163_begin_0, end = var_9163_end_0, end_mask = var_9163_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9163_cast_fp16")];
+            tensor<int32, [4]> var_9170_begin_0 = const()[name = tensor<string, []>("op_9170_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9170_end_0 = const()[name = tensor<string, []>("op_9170_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9170_end_mask_0 = const()[name = tensor<string, []>("op_9170_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9170_cast_fp16 = slice_by_index(begin = var_9170_begin_0, end = var_9170_end_0, end_mask = var_9170_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9170_cast_fp16")];
+            tensor<int32, [4]> var_9177_begin_0 = const()[name = tensor<string, []>("op_9177_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9177_end_0 = const()[name = tensor<string, []>("op_9177_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9177_end_mask_0 = const()[name = tensor<string, []>("op_9177_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9177_cast_fp16 = slice_by_index(begin = var_9177_begin_0, end = var_9177_end_0, end_mask = var_9177_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9177_cast_fp16")];
+            tensor<int32, [4]> var_9184_begin_0 = const()[name = tensor<string, []>("op_9184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9184_end_0 = const()[name = tensor<string, []>("op_9184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9184_end_mask_0 = const()[name = tensor<string, []>("op_9184_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9184_cast_fp16 = slice_by_index(begin = var_9184_begin_0, end = var_9184_end_0, end_mask = var_9184_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9184_cast_fp16")];
+            tensor<int32, [4]> var_9191_begin_0 = const()[name = tensor<string, []>("op_9191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9191_end_0 = const()[name = tensor<string, []>("op_9191_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9191_end_mask_0 = const()[name = tensor<string, []>("op_9191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9191_cast_fp16 = slice_by_index(begin = var_9191_begin_0, end = var_9191_end_0, end_mask = var_9191_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9191_cast_fp16")];
+            tensor<int32, [4]> var_9198_begin_0 = const()[name = tensor<string, []>("op_9198_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9198_end_0 = const()[name = tensor<string, []>("op_9198_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9198_end_mask_0 = const()[name = tensor<string, []>("op_9198_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9198_cast_fp16 = slice_by_index(begin = var_9198_begin_0, end = var_9198_end_0, end_mask = var_9198_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9198_cast_fp16")];
+            tensor<int32, [4]> var_9205_begin_0 = const()[name = tensor<string, []>("op_9205_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9205_end_0 = const()[name = tensor<string, []>("op_9205_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9205_end_mask_0 = const()[name = tensor<string, []>("op_9205_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9205_cast_fp16 = slice_by_index(begin = var_9205_begin_0, end = var_9205_end_0, end_mask = var_9205_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9205_cast_fp16")];
+            tensor<int32, [4]> var_9212_begin_0 = const()[name = tensor<string, []>("op_9212_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9212_end_0 = const()[name = tensor<string, []>("op_9212_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9212_end_mask_0 = const()[name = tensor<string, []>("op_9212_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9212_cast_fp16 = slice_by_index(begin = var_9212_begin_0, end = var_9212_end_0, end_mask = var_9212_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9212_cast_fp16")];
+            tensor<int32, [4]> var_9219_begin_0 = const()[name = tensor<string, []>("op_9219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9219_end_0 = const()[name = tensor<string, []>("op_9219_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9219_end_mask_0 = const()[name = tensor<string, []>("op_9219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9219_cast_fp16 = slice_by_index(begin = var_9219_begin_0, end = var_9219_end_0, end_mask = var_9219_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9219_cast_fp16")];
+            tensor<int32, [4]> k_19_perm_0 = const()[name = tensor<string, []>("k_19_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_9224_begin_0 = const()[name = tensor<string, []>("op_9224_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9224_end_0 = const()[name = tensor<string, []>("op_9224_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_9224_end_mask_0 = const()[name = tensor<string, []>("op_9224_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_2 = transpose(perm = k_19_perm_0, x = key_19_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_9224_cast_fp16 = slice_by_index(begin = var_9224_begin_0, end = var_9224_end_0, end_mask = var_9224_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9224_cast_fp16")];
+            tensor<int32, [4]> var_9228_begin_0 = const()[name = tensor<string, []>("op_9228_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_9228_end_0 = const()[name = tensor<string, []>("op_9228_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_9228_end_mask_0 = const()[name = tensor<string, []>("op_9228_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9228_cast_fp16 = slice_by_index(begin = var_9228_begin_0, end = var_9228_end_0, end_mask = var_9228_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9228_cast_fp16")];
+            tensor<int32, [4]> var_9232_begin_0 = const()[name = tensor<string, []>("op_9232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_9232_end_0 = const()[name = tensor<string, []>("op_9232_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_9232_end_mask_0 = const()[name = tensor<string, []>("op_9232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9232_cast_fp16 = slice_by_index(begin = var_9232_begin_0, end = var_9232_end_0, end_mask = var_9232_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9232_cast_fp16")];
+            tensor<int32, [4]> var_9236_begin_0 = const()[name = tensor<string, []>("op_9236_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_9236_end_0 = const()[name = tensor<string, []>("op_9236_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_9236_end_mask_0 = const()[name = tensor<string, []>("op_9236_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9236_cast_fp16 = slice_by_index(begin = var_9236_begin_0, end = var_9236_end_0, end_mask = var_9236_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9236_cast_fp16")];
+            tensor<int32, [4]> var_9240_begin_0 = const()[name = tensor<string, []>("op_9240_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_9240_end_0 = const()[name = tensor<string, []>("op_9240_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_9240_end_mask_0 = const()[name = tensor<string, []>("op_9240_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9240_cast_fp16 = slice_by_index(begin = var_9240_begin_0, end = var_9240_end_0, end_mask = var_9240_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9240_cast_fp16")];
+            tensor<int32, [4]> var_9244_begin_0 = const()[name = tensor<string, []>("op_9244_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_9244_end_0 = const()[name = tensor<string, []>("op_9244_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_9244_end_mask_0 = const()[name = tensor<string, []>("op_9244_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9244_cast_fp16 = slice_by_index(begin = var_9244_begin_0, end = var_9244_end_0, end_mask = var_9244_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9244_cast_fp16")];
+            tensor<int32, [4]> var_9248_begin_0 = const()[name = tensor<string, []>("op_9248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_9248_end_0 = const()[name = tensor<string, []>("op_9248_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_9248_end_mask_0 = const()[name = tensor<string, []>("op_9248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9248_cast_fp16 = slice_by_index(begin = var_9248_begin_0, end = var_9248_end_0, end_mask = var_9248_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9248_cast_fp16")];
+            tensor<int32, [4]> var_9252_begin_0 = const()[name = tensor<string, []>("op_9252_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_9252_end_0 = const()[name = tensor<string, []>("op_9252_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_9252_end_mask_0 = const()[name = tensor<string, []>("op_9252_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9252_cast_fp16 = slice_by_index(begin = var_9252_begin_0, end = var_9252_end_0, end_mask = var_9252_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9252_cast_fp16")];
+            tensor<int32, [4]> var_9256_begin_0 = const()[name = tensor<string, []>("op_9256_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_9256_end_0 = const()[name = tensor<string, []>("op_9256_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_9256_end_mask_0 = const()[name = tensor<string, []>("op_9256_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9256_cast_fp16 = slice_by_index(begin = var_9256_begin_0, end = var_9256_end_0, end_mask = var_9256_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9256_cast_fp16")];
+            tensor<int32, [4]> var_9260_begin_0 = const()[name = tensor<string, []>("op_9260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_9260_end_0 = const()[name = tensor<string, []>("op_9260_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_9260_end_mask_0 = const()[name = tensor<string, []>("op_9260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9260_cast_fp16 = slice_by_index(begin = var_9260_begin_0, end = var_9260_end_0, end_mask = var_9260_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9260_cast_fp16")];
+            tensor<int32, [4]> var_9264_begin_0 = const()[name = tensor<string, []>("op_9264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_9264_end_0 = const()[name = tensor<string, []>("op_9264_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_9264_end_mask_0 = const()[name = tensor<string, []>("op_9264_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9264_cast_fp16 = slice_by_index(begin = var_9264_begin_0, end = var_9264_end_0, end_mask = var_9264_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9264_cast_fp16")];
+            tensor<int32, [4]> var_9268_begin_0 = const()[name = tensor<string, []>("op_9268_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_9268_end_0 = const()[name = tensor<string, []>("op_9268_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_9268_end_mask_0 = const()[name = tensor<string, []>("op_9268_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9268_cast_fp16 = slice_by_index(begin = var_9268_begin_0, end = var_9268_end_0, end_mask = var_9268_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9268_cast_fp16")];
+            tensor<int32, [4]> var_9270_begin_0 = const()[name = tensor<string, []>("op_9270_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9270_end_0 = const()[name = tensor<string, []>("op_9270_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9270_end_mask_0 = const()[name = tensor<string, []>("op_9270_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9270_cast_fp16 = slice_by_index(begin = var_9270_begin_0, end = var_9270_end_0, end_mask = var_9270_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9270_cast_fp16")];
+            tensor<int32, [4]> var_9274_begin_0 = const()[name = tensor<string, []>("op_9274_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_9274_end_0 = const()[name = tensor<string, []>("op_9274_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_9274_end_mask_0 = const()[name = tensor<string, []>("op_9274_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9274_cast_fp16 = slice_by_index(begin = var_9274_begin_0, end = var_9274_end_0, end_mask = var_9274_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9274_cast_fp16")];
+            tensor<int32, [4]> var_9278_begin_0 = const()[name = tensor<string, []>("op_9278_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_9278_end_0 = const()[name = tensor<string, []>("op_9278_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_9278_end_mask_0 = const()[name = tensor<string, []>("op_9278_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9278_cast_fp16 = slice_by_index(begin = var_9278_begin_0, end = var_9278_end_0, end_mask = var_9278_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9278_cast_fp16")];
+            tensor<int32, [4]> var_9282_begin_0 = const()[name = tensor<string, []>("op_9282_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_9282_end_0 = const()[name = tensor<string, []>("op_9282_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_9282_end_mask_0 = const()[name = tensor<string, []>("op_9282_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9282_cast_fp16 = slice_by_index(begin = var_9282_begin_0, end = var_9282_end_0, end_mask = var_9282_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9282_cast_fp16")];
+            tensor<int32, [4]> var_9286_begin_0 = const()[name = tensor<string, []>("op_9286_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_9286_end_0 = const()[name = tensor<string, []>("op_9286_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_9286_end_mask_0 = const()[name = tensor<string, []>("op_9286_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9286_cast_fp16 = slice_by_index(begin = var_9286_begin_0, end = var_9286_end_0, end_mask = var_9286_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9286_cast_fp16")];
+            tensor<int32, [4]> var_9290_begin_0 = const()[name = tensor<string, []>("op_9290_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_9290_end_0 = const()[name = tensor<string, []>("op_9290_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_9290_end_mask_0 = const()[name = tensor<string, []>("op_9290_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9290_cast_fp16 = slice_by_index(begin = var_9290_begin_0, end = var_9290_end_0, end_mask = var_9290_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9290_cast_fp16")];
+            tensor<int32, [4]> var_9294_begin_0 = const()[name = tensor<string, []>("op_9294_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_9294_end_0 = const()[name = tensor<string, []>("op_9294_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_9294_end_mask_0 = const()[name = tensor<string, []>("op_9294_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9294_cast_fp16 = slice_by_index(begin = var_9294_begin_0, end = var_9294_end_0, end_mask = var_9294_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9294_cast_fp16")];
+            tensor<int32, [4]> var_9298_begin_0 = const()[name = tensor<string, []>("op_9298_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_9298_end_0 = const()[name = tensor<string, []>("op_9298_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_9298_end_mask_0 = const()[name = tensor<string, []>("op_9298_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9298_cast_fp16 = slice_by_index(begin = var_9298_begin_0, end = var_9298_end_0, end_mask = var_9298_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9298_cast_fp16")];
+            tensor<int32, [4]> var_9302_begin_0 = const()[name = tensor<string, []>("op_9302_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_9302_end_0 = const()[name = tensor<string, []>("op_9302_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_9302_end_mask_0 = const()[name = tensor<string, []>("op_9302_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9302_cast_fp16 = slice_by_index(begin = var_9302_begin_0, end = var_9302_end_0, end_mask = var_9302_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9302_cast_fp16")];
+            tensor<int32, [4]> var_9306_begin_0 = const()[name = tensor<string, []>("op_9306_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_9306_end_0 = const()[name = tensor<string, []>("op_9306_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_9306_end_mask_0 = const()[name = tensor<string, []>("op_9306_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9306_cast_fp16 = slice_by_index(begin = var_9306_begin_0, end = var_9306_end_0, end_mask = var_9306_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9306_cast_fp16")];
+            tensor<int32, [4]> var_9310_begin_0 = const()[name = tensor<string, []>("op_9310_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_9310_end_0 = const()[name = tensor<string, []>("op_9310_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_9310_end_mask_0 = const()[name = tensor<string, []>("op_9310_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9310_cast_fp16 = slice_by_index(begin = var_9310_begin_0, end = var_9310_end_0, end_mask = var_9310_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9310_cast_fp16")];
+            tensor<int32, [4]> var_9314_begin_0 = const()[name = tensor<string, []>("op_9314_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_9314_end_0 = const()[name = tensor<string, []>("op_9314_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_9314_end_mask_0 = const()[name = tensor<string, []>("op_9314_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9314_cast_fp16 = slice_by_index(begin = var_9314_begin_0, end = var_9314_end_0, end_mask = var_9314_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9314_cast_fp16")];
+            tensor<string, []> var_9318_equation_0 = const()[name = tensor<string, []>("op_9318_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9318_cast_fp16 = einsum(equation = var_9318_equation_0, values = (var_9224_cast_fp16, var_8890_cast_fp16))[name = tensor<string, []>("op_9318_cast_fp16")];
+            tensor<fp16, []> var_9319_to_fp16 = const()[name = tensor<string, []>("op_9319_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_865_cast_fp16 = mul(x = var_9318_cast_fp16, y = var_9319_to_fp16)[name = tensor<string, []>("aw_chunk_865_cast_fp16")];
+            tensor<string, []> var_9322_equation_0 = const()[name = tensor<string, []>("op_9322_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9322_cast_fp16 = einsum(equation = var_9322_equation_0, values = (var_9224_cast_fp16, var_8897_cast_fp16))[name = tensor<string, []>("op_9322_cast_fp16")];
+            tensor<fp16, []> var_9323_to_fp16 = const()[name = tensor<string, []>("op_9323_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_867_cast_fp16 = mul(x = var_9322_cast_fp16, y = var_9323_to_fp16)[name = tensor<string, []>("aw_chunk_867_cast_fp16")];
+            tensor<string, []> var_9326_equation_0 = const()[name = tensor<string, []>("op_9326_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9326_cast_fp16 = einsum(equation = var_9326_equation_0, values = (var_9224_cast_fp16, var_8904_cast_fp16))[name = tensor<string, []>("op_9326_cast_fp16")];
+            tensor<fp16, []> var_9327_to_fp16 = const()[name = tensor<string, []>("op_9327_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_869_cast_fp16 = mul(x = var_9326_cast_fp16, y = var_9327_to_fp16)[name = tensor<string, []>("aw_chunk_869_cast_fp16")];
+            tensor<string, []> var_9330_equation_0 = const()[name = tensor<string, []>("op_9330_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9330_cast_fp16 = einsum(equation = var_9330_equation_0, values = (var_9224_cast_fp16, var_8911_cast_fp16))[name = tensor<string, []>("op_9330_cast_fp16")];
+            tensor<fp16, []> var_9331_to_fp16 = const()[name = tensor<string, []>("op_9331_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_871_cast_fp16 = mul(x = var_9330_cast_fp16, y = var_9331_to_fp16)[name = tensor<string, []>("aw_chunk_871_cast_fp16")];
+            tensor<string, []> var_9334_equation_0 = const()[name = tensor<string, []>("op_9334_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9334_cast_fp16 = einsum(equation = var_9334_equation_0, values = (var_9228_cast_fp16, var_8918_cast_fp16))[name = tensor<string, []>("op_9334_cast_fp16")];
+            tensor<fp16, []> var_9335_to_fp16 = const()[name = tensor<string, []>("op_9335_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_873_cast_fp16 = mul(x = var_9334_cast_fp16, y = var_9335_to_fp16)[name = tensor<string, []>("aw_chunk_873_cast_fp16")];
+            tensor<string, []> var_9338_equation_0 = const()[name = tensor<string, []>("op_9338_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9338_cast_fp16 = einsum(equation = var_9338_equation_0, values = (var_9228_cast_fp16, var_8925_cast_fp16))[name = tensor<string, []>("op_9338_cast_fp16")];
+            tensor<fp16, []> var_9339_to_fp16 = const()[name = tensor<string, []>("op_9339_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_875_cast_fp16 = mul(x = var_9338_cast_fp16, y = var_9339_to_fp16)[name = tensor<string, []>("aw_chunk_875_cast_fp16")];
+            tensor<string, []> var_9342_equation_0 = const()[name = tensor<string, []>("op_9342_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9342_cast_fp16 = einsum(equation = var_9342_equation_0, values = (var_9228_cast_fp16, var_8932_cast_fp16))[name = tensor<string, []>("op_9342_cast_fp16")];
+            tensor<fp16, []> var_9343_to_fp16 = const()[name = tensor<string, []>("op_9343_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_877_cast_fp16 = mul(x = var_9342_cast_fp16, y = var_9343_to_fp16)[name = tensor<string, []>("aw_chunk_877_cast_fp16")];
+            tensor<string, []> var_9346_equation_0 = const()[name = tensor<string, []>("op_9346_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9346_cast_fp16 = einsum(equation = var_9346_equation_0, values = (var_9228_cast_fp16, var_8939_cast_fp16))[name = tensor<string, []>("op_9346_cast_fp16")];
+            tensor<fp16, []> var_9347_to_fp16 = const()[name = tensor<string, []>("op_9347_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_879_cast_fp16 = mul(x = var_9346_cast_fp16, y = var_9347_to_fp16)[name = tensor<string, []>("aw_chunk_879_cast_fp16")];
+            tensor<string, []> var_9350_equation_0 = const()[name = tensor<string, []>("op_9350_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9350_cast_fp16 = einsum(equation = var_9350_equation_0, values = (var_9232_cast_fp16, var_8946_cast_fp16))[name = tensor<string, []>("op_9350_cast_fp16")];
+            tensor<fp16, []> var_9351_to_fp16 = const()[name = tensor<string, []>("op_9351_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_881_cast_fp16 = mul(x = var_9350_cast_fp16, y = var_9351_to_fp16)[name = tensor<string, []>("aw_chunk_881_cast_fp16")];
+            tensor<string, []> var_9354_equation_0 = const()[name = tensor<string, []>("op_9354_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9354_cast_fp16 = einsum(equation = var_9354_equation_0, values = (var_9232_cast_fp16, var_8953_cast_fp16))[name = tensor<string, []>("op_9354_cast_fp16")];
+            tensor<fp16, []> var_9355_to_fp16 = const()[name = tensor<string, []>("op_9355_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_883_cast_fp16 = mul(x = var_9354_cast_fp16, y = var_9355_to_fp16)[name = tensor<string, []>("aw_chunk_883_cast_fp16")];
+            tensor<string, []> var_9358_equation_0 = const()[name = tensor<string, []>("op_9358_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9358_cast_fp16 = einsum(equation = var_9358_equation_0, values = (var_9232_cast_fp16, var_8960_cast_fp16))[name = tensor<string, []>("op_9358_cast_fp16")];
+            tensor<fp16, []> var_9359_to_fp16 = const()[name = tensor<string, []>("op_9359_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_885_cast_fp16 = mul(x = var_9358_cast_fp16, y = var_9359_to_fp16)[name = tensor<string, []>("aw_chunk_885_cast_fp16")];
+            tensor<string, []> var_9362_equation_0 = const()[name = tensor<string, []>("op_9362_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9362_cast_fp16 = einsum(equation = var_9362_equation_0, values = (var_9232_cast_fp16, var_8967_cast_fp16))[name = tensor<string, []>("op_9362_cast_fp16")];
+            tensor<fp16, []> var_9363_to_fp16 = const()[name = tensor<string, []>("op_9363_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_887_cast_fp16 = mul(x = var_9362_cast_fp16, y = var_9363_to_fp16)[name = tensor<string, []>("aw_chunk_887_cast_fp16")];
+            tensor<string, []> var_9366_equation_0 = const()[name = tensor<string, []>("op_9366_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9366_cast_fp16 = einsum(equation = var_9366_equation_0, values = (var_9236_cast_fp16, var_8974_cast_fp16))[name = tensor<string, []>("op_9366_cast_fp16")];
+            tensor<fp16, []> var_9367_to_fp16 = const()[name = tensor<string, []>("op_9367_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_889_cast_fp16 = mul(x = var_9366_cast_fp16, y = var_9367_to_fp16)[name = tensor<string, []>("aw_chunk_889_cast_fp16")];
+            tensor<string, []> var_9370_equation_0 = const()[name = tensor<string, []>("op_9370_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9370_cast_fp16 = einsum(equation = var_9370_equation_0, values = (var_9236_cast_fp16, var_8981_cast_fp16))[name = tensor<string, []>("op_9370_cast_fp16")];
+            tensor<fp16, []> var_9371_to_fp16 = const()[name = tensor<string, []>("op_9371_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_891_cast_fp16 = mul(x = var_9370_cast_fp16, y = var_9371_to_fp16)[name = tensor<string, []>("aw_chunk_891_cast_fp16")];
+            tensor<string, []> var_9374_equation_0 = const()[name = tensor<string, []>("op_9374_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9374_cast_fp16 = einsum(equation = var_9374_equation_0, values = (var_9236_cast_fp16, var_8988_cast_fp16))[name = tensor<string, []>("op_9374_cast_fp16")];
+            tensor<fp16, []> var_9375_to_fp16 = const()[name = tensor<string, []>("op_9375_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_893_cast_fp16 = mul(x = var_9374_cast_fp16, y = var_9375_to_fp16)[name = tensor<string, []>("aw_chunk_893_cast_fp16")];
+            tensor<string, []> var_9378_equation_0 = const()[name = tensor<string, []>("op_9378_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9378_cast_fp16 = einsum(equation = var_9378_equation_0, values = (var_9236_cast_fp16, var_8995_cast_fp16))[name = tensor<string, []>("op_9378_cast_fp16")];
+            tensor<fp16, []> var_9379_to_fp16 = const()[name = tensor<string, []>("op_9379_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_895_cast_fp16 = mul(x = var_9378_cast_fp16, y = var_9379_to_fp16)[name = tensor<string, []>("aw_chunk_895_cast_fp16")];
+            tensor<string, []> var_9382_equation_0 = const()[name = tensor<string, []>("op_9382_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9382_cast_fp16 = einsum(equation = var_9382_equation_0, values = (var_9240_cast_fp16, var_9002_cast_fp16))[name = tensor<string, []>("op_9382_cast_fp16")];
+            tensor<fp16, []> var_9383_to_fp16 = const()[name = tensor<string, []>("op_9383_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_897_cast_fp16 = mul(x = var_9382_cast_fp16, y = var_9383_to_fp16)[name = tensor<string, []>("aw_chunk_897_cast_fp16")];
+            tensor<string, []> var_9386_equation_0 = const()[name = tensor<string, []>("op_9386_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9386_cast_fp16 = einsum(equation = var_9386_equation_0, values = (var_9240_cast_fp16, var_9009_cast_fp16))[name = tensor<string, []>("op_9386_cast_fp16")];
+            tensor<fp16, []> var_9387_to_fp16 = const()[name = tensor<string, []>("op_9387_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_899_cast_fp16 = mul(x = var_9386_cast_fp16, y = var_9387_to_fp16)[name = tensor<string, []>("aw_chunk_899_cast_fp16")];
+            tensor<string, []> var_9390_equation_0 = const()[name = tensor<string, []>("op_9390_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9390_cast_fp16 = einsum(equation = var_9390_equation_0, values = (var_9240_cast_fp16, var_9016_cast_fp16))[name = tensor<string, []>("op_9390_cast_fp16")];
+            tensor<fp16, []> var_9391_to_fp16 = const()[name = tensor<string, []>("op_9391_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_901_cast_fp16 = mul(x = var_9390_cast_fp16, y = var_9391_to_fp16)[name = tensor<string, []>("aw_chunk_901_cast_fp16")];
+            tensor<string, []> var_9394_equation_0 = const()[name = tensor<string, []>("op_9394_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9394_cast_fp16 = einsum(equation = var_9394_equation_0, values = (var_9240_cast_fp16, var_9023_cast_fp16))[name = tensor<string, []>("op_9394_cast_fp16")];
+            tensor<fp16, []> var_9395_to_fp16 = const()[name = tensor<string, []>("op_9395_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_903_cast_fp16 = mul(x = var_9394_cast_fp16, y = var_9395_to_fp16)[name = tensor<string, []>("aw_chunk_903_cast_fp16")];
+            tensor<string, []> var_9398_equation_0 = const()[name = tensor<string, []>("op_9398_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9398_cast_fp16 = einsum(equation = var_9398_equation_0, values = (var_9244_cast_fp16, var_9030_cast_fp16))[name = tensor<string, []>("op_9398_cast_fp16")];
+            tensor<fp16, []> var_9399_to_fp16 = const()[name = tensor<string, []>("op_9399_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_905_cast_fp16 = mul(x = var_9398_cast_fp16, y = var_9399_to_fp16)[name = tensor<string, []>("aw_chunk_905_cast_fp16")];
+            tensor<string, []> var_9402_equation_0 = const()[name = tensor<string, []>("op_9402_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9402_cast_fp16 = einsum(equation = var_9402_equation_0, values = (var_9244_cast_fp16, var_9037_cast_fp16))[name = tensor<string, []>("op_9402_cast_fp16")];
+            tensor<fp16, []> var_9403_to_fp16 = const()[name = tensor<string, []>("op_9403_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_907_cast_fp16 = mul(x = var_9402_cast_fp16, y = var_9403_to_fp16)[name = tensor<string, []>("aw_chunk_907_cast_fp16")];
+            tensor<string, []> var_9406_equation_0 = const()[name = tensor<string, []>("op_9406_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9406_cast_fp16 = einsum(equation = var_9406_equation_0, values = (var_9244_cast_fp16, var_9044_cast_fp16))[name = tensor<string, []>("op_9406_cast_fp16")];
+            tensor<fp16, []> var_9407_to_fp16 = const()[name = tensor<string, []>("op_9407_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_909_cast_fp16 = mul(x = var_9406_cast_fp16, y = var_9407_to_fp16)[name = tensor<string, []>("aw_chunk_909_cast_fp16")];
+            tensor<string, []> var_9410_equation_0 = const()[name = tensor<string, []>("op_9410_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9410_cast_fp16 = einsum(equation = var_9410_equation_0, values = (var_9244_cast_fp16, var_9051_cast_fp16))[name = tensor<string, []>("op_9410_cast_fp16")];
+            tensor<fp16, []> var_9411_to_fp16 = const()[name = tensor<string, []>("op_9411_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_911_cast_fp16 = mul(x = var_9410_cast_fp16, y = var_9411_to_fp16)[name = tensor<string, []>("aw_chunk_911_cast_fp16")];
+            tensor<string, []> var_9414_equation_0 = const()[name = tensor<string, []>("op_9414_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9414_cast_fp16 = einsum(equation = var_9414_equation_0, values = (var_9248_cast_fp16, var_9058_cast_fp16))[name = tensor<string, []>("op_9414_cast_fp16")];
+            tensor<fp16, []> var_9415_to_fp16 = const()[name = tensor<string, []>("op_9415_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_913_cast_fp16 = mul(x = var_9414_cast_fp16, y = var_9415_to_fp16)[name = tensor<string, []>("aw_chunk_913_cast_fp16")];
+            tensor<string, []> var_9418_equation_0 = const()[name = tensor<string, []>("op_9418_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9418_cast_fp16 = einsum(equation = var_9418_equation_0, values = (var_9248_cast_fp16, var_9065_cast_fp16))[name = tensor<string, []>("op_9418_cast_fp16")];
+            tensor<fp16, []> var_9419_to_fp16 = const()[name = tensor<string, []>("op_9419_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_915_cast_fp16 = mul(x = var_9418_cast_fp16, y = var_9419_to_fp16)[name = tensor<string, []>("aw_chunk_915_cast_fp16")];
+            tensor<string, []> var_9422_equation_0 = const()[name = tensor<string, []>("op_9422_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9422_cast_fp16 = einsum(equation = var_9422_equation_0, values = (var_9248_cast_fp16, var_9072_cast_fp16))[name = tensor<string, []>("op_9422_cast_fp16")];
+            tensor<fp16, []> var_9423_to_fp16 = const()[name = tensor<string, []>("op_9423_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_917_cast_fp16 = mul(x = var_9422_cast_fp16, y = var_9423_to_fp16)[name = tensor<string, []>("aw_chunk_917_cast_fp16")];
+            tensor<string, []> var_9426_equation_0 = const()[name = tensor<string, []>("op_9426_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9426_cast_fp16 = einsum(equation = var_9426_equation_0, values = (var_9248_cast_fp16, var_9079_cast_fp16))[name = tensor<string, []>("op_9426_cast_fp16")];
+            tensor<fp16, []> var_9427_to_fp16 = const()[name = tensor<string, []>("op_9427_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_919_cast_fp16 = mul(x = var_9426_cast_fp16, y = var_9427_to_fp16)[name = tensor<string, []>("aw_chunk_919_cast_fp16")];
+            tensor<string, []> var_9430_equation_0 = const()[name = tensor<string, []>("op_9430_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9430_cast_fp16 = einsum(equation = var_9430_equation_0, values = (var_9252_cast_fp16, var_9086_cast_fp16))[name = tensor<string, []>("op_9430_cast_fp16")];
+            tensor<fp16, []> var_9431_to_fp16 = const()[name = tensor<string, []>("op_9431_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_921_cast_fp16 = mul(x = var_9430_cast_fp16, y = var_9431_to_fp16)[name = tensor<string, []>("aw_chunk_921_cast_fp16")];
+            tensor<string, []> var_9434_equation_0 = const()[name = tensor<string, []>("op_9434_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9434_cast_fp16 = einsum(equation = var_9434_equation_0, values = (var_9252_cast_fp16, var_9093_cast_fp16))[name = tensor<string, []>("op_9434_cast_fp16")];
+            tensor<fp16, []> var_9435_to_fp16 = const()[name = tensor<string, []>("op_9435_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_923_cast_fp16 = mul(x = var_9434_cast_fp16, y = var_9435_to_fp16)[name = tensor<string, []>("aw_chunk_923_cast_fp16")];
+            tensor<string, []> var_9438_equation_0 = const()[name = tensor<string, []>("op_9438_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9438_cast_fp16 = einsum(equation = var_9438_equation_0, values = (var_9252_cast_fp16, var_9100_cast_fp16))[name = tensor<string, []>("op_9438_cast_fp16")];
+            tensor<fp16, []> var_9439_to_fp16 = const()[name = tensor<string, []>("op_9439_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_925_cast_fp16 = mul(x = var_9438_cast_fp16, y = var_9439_to_fp16)[name = tensor<string, []>("aw_chunk_925_cast_fp16")];
+            tensor<string, []> var_9442_equation_0 = const()[name = tensor<string, []>("op_9442_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9442_cast_fp16 = einsum(equation = var_9442_equation_0, values = (var_9252_cast_fp16, var_9107_cast_fp16))[name = tensor<string, []>("op_9442_cast_fp16")];
+            tensor<fp16, []> var_9443_to_fp16 = const()[name = tensor<string, []>("op_9443_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_927_cast_fp16 = mul(x = var_9442_cast_fp16, y = var_9443_to_fp16)[name = tensor<string, []>("aw_chunk_927_cast_fp16")];
+            tensor<string, []> var_9446_equation_0 = const()[name = tensor<string, []>("op_9446_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9446_cast_fp16 = einsum(equation = var_9446_equation_0, values = (var_9256_cast_fp16, var_9114_cast_fp16))[name = tensor<string, []>("op_9446_cast_fp16")];
+            tensor<fp16, []> var_9447_to_fp16 = const()[name = tensor<string, []>("op_9447_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_929_cast_fp16 = mul(x = var_9446_cast_fp16, y = var_9447_to_fp16)[name = tensor<string, []>("aw_chunk_929_cast_fp16")];
+            tensor<string, []> var_9450_equation_0 = const()[name = tensor<string, []>("op_9450_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9450_cast_fp16 = einsum(equation = var_9450_equation_0, values = (var_9256_cast_fp16, var_9121_cast_fp16))[name = tensor<string, []>("op_9450_cast_fp16")];
+            tensor<fp16, []> var_9451_to_fp16 = const()[name = tensor<string, []>("op_9451_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_931_cast_fp16 = mul(x = var_9450_cast_fp16, y = var_9451_to_fp16)[name = tensor<string, []>("aw_chunk_931_cast_fp16")];
+            tensor<string, []> var_9454_equation_0 = const()[name = tensor<string, []>("op_9454_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9454_cast_fp16 = einsum(equation = var_9454_equation_0, values = (var_9256_cast_fp16, var_9128_cast_fp16))[name = tensor<string, []>("op_9454_cast_fp16")];
+            tensor<fp16, []> var_9455_to_fp16 = const()[name = tensor<string, []>("op_9455_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_933_cast_fp16 = mul(x = var_9454_cast_fp16, y = var_9455_to_fp16)[name = tensor<string, []>("aw_chunk_933_cast_fp16")];
+            tensor<string, []> var_9458_equation_0 = const()[name = tensor<string, []>("op_9458_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9458_cast_fp16 = einsum(equation = var_9458_equation_0, values = (var_9256_cast_fp16, var_9135_cast_fp16))[name = tensor<string, []>("op_9458_cast_fp16")];
+            tensor<fp16, []> var_9459_to_fp16 = const()[name = tensor<string, []>("op_9459_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_935_cast_fp16 = mul(x = var_9458_cast_fp16, y = var_9459_to_fp16)[name = tensor<string, []>("aw_chunk_935_cast_fp16")];
+            tensor<string, []> var_9462_equation_0 = const()[name = tensor<string, []>("op_9462_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9462_cast_fp16 = einsum(equation = var_9462_equation_0, values = (var_9260_cast_fp16, var_9142_cast_fp16))[name = tensor<string, []>("op_9462_cast_fp16")];
+            tensor<fp16, []> var_9463_to_fp16 = const()[name = tensor<string, []>("op_9463_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_937_cast_fp16 = mul(x = var_9462_cast_fp16, y = var_9463_to_fp16)[name = tensor<string, []>("aw_chunk_937_cast_fp16")];
+            tensor<string, []> var_9466_equation_0 = const()[name = tensor<string, []>("op_9466_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9466_cast_fp16 = einsum(equation = var_9466_equation_0, values = (var_9260_cast_fp16, var_9149_cast_fp16))[name = tensor<string, []>("op_9466_cast_fp16")];
+            tensor<fp16, []> var_9467_to_fp16 = const()[name = tensor<string, []>("op_9467_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_939_cast_fp16 = mul(x = var_9466_cast_fp16, y = var_9467_to_fp16)[name = tensor<string, []>("aw_chunk_939_cast_fp16")];
+            tensor<string, []> var_9470_equation_0 = const()[name = tensor<string, []>("op_9470_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9470_cast_fp16 = einsum(equation = var_9470_equation_0, values = (var_9260_cast_fp16, var_9156_cast_fp16))[name = tensor<string, []>("op_9470_cast_fp16")];
+            tensor<fp16, []> var_9471_to_fp16 = const()[name = tensor<string, []>("op_9471_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_941_cast_fp16 = mul(x = var_9470_cast_fp16, y = var_9471_to_fp16)[name = tensor<string, []>("aw_chunk_941_cast_fp16")];
+            tensor<string, []> var_9474_equation_0 = const()[name = tensor<string, []>("op_9474_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9474_cast_fp16 = einsum(equation = var_9474_equation_0, values = (var_9260_cast_fp16, var_9163_cast_fp16))[name = tensor<string, []>("op_9474_cast_fp16")];
+            tensor<fp16, []> var_9475_to_fp16 = const()[name = tensor<string, []>("op_9475_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_943_cast_fp16 = mul(x = var_9474_cast_fp16, y = var_9475_to_fp16)[name = tensor<string, []>("aw_chunk_943_cast_fp16")];
+            tensor<string, []> var_9478_equation_0 = const()[name = tensor<string, []>("op_9478_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9478_cast_fp16 = einsum(equation = var_9478_equation_0, values = (var_9264_cast_fp16, var_9170_cast_fp16))[name = tensor<string, []>("op_9478_cast_fp16")];
+            tensor<fp16, []> var_9479_to_fp16 = const()[name = tensor<string, []>("op_9479_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_945_cast_fp16 = mul(x = var_9478_cast_fp16, y = var_9479_to_fp16)[name = tensor<string, []>("aw_chunk_945_cast_fp16")];
+            tensor<string, []> var_9482_equation_0 = const()[name = tensor<string, []>("op_9482_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9482_cast_fp16 = einsum(equation = var_9482_equation_0, values = (var_9264_cast_fp16, var_9177_cast_fp16))[name = tensor<string, []>("op_9482_cast_fp16")];
+            tensor<fp16, []> var_9483_to_fp16 = const()[name = tensor<string, []>("op_9483_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_947_cast_fp16 = mul(x = var_9482_cast_fp16, y = var_9483_to_fp16)[name = tensor<string, []>("aw_chunk_947_cast_fp16")];
+            tensor<string, []> var_9486_equation_0 = const()[name = tensor<string, []>("op_9486_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9486_cast_fp16 = einsum(equation = var_9486_equation_0, values = (var_9264_cast_fp16, var_9184_cast_fp16))[name = tensor<string, []>("op_9486_cast_fp16")];
+            tensor<fp16, []> var_9487_to_fp16 = const()[name = tensor<string, []>("op_9487_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_949_cast_fp16 = mul(x = var_9486_cast_fp16, y = var_9487_to_fp16)[name = tensor<string, []>("aw_chunk_949_cast_fp16")];
+            tensor<string, []> var_9490_equation_0 = const()[name = tensor<string, []>("op_9490_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9490_cast_fp16 = einsum(equation = var_9490_equation_0, values = (var_9264_cast_fp16, var_9191_cast_fp16))[name = tensor<string, []>("op_9490_cast_fp16")];
+            tensor<fp16, []> var_9491_to_fp16 = const()[name = tensor<string, []>("op_9491_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_951_cast_fp16 = mul(x = var_9490_cast_fp16, y = var_9491_to_fp16)[name = tensor<string, []>("aw_chunk_951_cast_fp16")];
+            tensor<string, []> var_9494_equation_0 = const()[name = tensor<string, []>("op_9494_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9494_cast_fp16 = einsum(equation = var_9494_equation_0, values = (var_9268_cast_fp16, var_9198_cast_fp16))[name = tensor<string, []>("op_9494_cast_fp16")];
+            tensor<fp16, []> var_9495_to_fp16 = const()[name = tensor<string, []>("op_9495_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_953_cast_fp16 = mul(x = var_9494_cast_fp16, y = var_9495_to_fp16)[name = tensor<string, []>("aw_chunk_953_cast_fp16")];
+            tensor<string, []> var_9498_equation_0 = const()[name = tensor<string, []>("op_9498_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9498_cast_fp16 = einsum(equation = var_9498_equation_0, values = (var_9268_cast_fp16, var_9205_cast_fp16))[name = tensor<string, []>("op_9498_cast_fp16")];
+            tensor<fp16, []> var_9499_to_fp16 = const()[name = tensor<string, []>("op_9499_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_955_cast_fp16 = mul(x = var_9498_cast_fp16, y = var_9499_to_fp16)[name = tensor<string, []>("aw_chunk_955_cast_fp16")];
+            tensor<string, []> var_9502_equation_0 = const()[name = tensor<string, []>("op_9502_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9502_cast_fp16 = einsum(equation = var_9502_equation_0, values = (var_9268_cast_fp16, var_9212_cast_fp16))[name = tensor<string, []>("op_9502_cast_fp16")];
+            tensor<fp16, []> var_9503_to_fp16 = const()[name = tensor<string, []>("op_9503_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_957_cast_fp16 = mul(x = var_9502_cast_fp16, y = var_9503_to_fp16)[name = tensor<string, []>("aw_chunk_957_cast_fp16")];
+            tensor<string, []> var_9506_equation_0 = const()[name = tensor<string, []>("op_9506_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9506_cast_fp16 = einsum(equation = var_9506_equation_0, values = (var_9268_cast_fp16, var_9219_cast_fp16))[name = tensor<string, []>("op_9506_cast_fp16")];
+            tensor<fp16, []> var_9507_to_fp16 = const()[name = tensor<string, []>("op_9507_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_959_cast_fp16 = mul(x = var_9506_cast_fp16, y = var_9507_to_fp16)[name = tensor<string, []>("aw_chunk_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9509_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_865_cast_fp16)[name = tensor<string, []>("op_9509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9510_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_867_cast_fp16)[name = tensor<string, []>("op_9510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9511_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_869_cast_fp16)[name = tensor<string, []>("op_9511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9512_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_871_cast_fp16)[name = tensor<string, []>("op_9512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9513_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_873_cast_fp16)[name = tensor<string, []>("op_9513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9514_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_875_cast_fp16)[name = tensor<string, []>("op_9514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9515_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_877_cast_fp16)[name = tensor<string, []>("op_9515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9516_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_879_cast_fp16)[name = tensor<string, []>("op_9516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9517_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_881_cast_fp16)[name = tensor<string, []>("op_9517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9518_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_883_cast_fp16)[name = tensor<string, []>("op_9518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9519_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_885_cast_fp16)[name = tensor<string, []>("op_9519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9520_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_887_cast_fp16)[name = tensor<string, []>("op_9520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9521_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_889_cast_fp16)[name = tensor<string, []>("op_9521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9522_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_891_cast_fp16)[name = tensor<string, []>("op_9522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9523_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_893_cast_fp16)[name = tensor<string, []>("op_9523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9524_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_895_cast_fp16)[name = tensor<string, []>("op_9524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9525_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_897_cast_fp16)[name = tensor<string, []>("op_9525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9526_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_899_cast_fp16)[name = tensor<string, []>("op_9526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9527_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_901_cast_fp16)[name = tensor<string, []>("op_9527_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9528_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_903_cast_fp16)[name = tensor<string, []>("op_9528_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9529_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_905_cast_fp16)[name = tensor<string, []>("op_9529_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9530_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_907_cast_fp16)[name = tensor<string, []>("op_9530_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9531_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_909_cast_fp16)[name = tensor<string, []>("op_9531_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9532_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_911_cast_fp16)[name = tensor<string, []>("op_9532_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9533_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_913_cast_fp16)[name = tensor<string, []>("op_9533_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9534_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_915_cast_fp16)[name = tensor<string, []>("op_9534_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9535_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_917_cast_fp16)[name = tensor<string, []>("op_9535_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9536_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_919_cast_fp16)[name = tensor<string, []>("op_9536_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9537_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_921_cast_fp16)[name = tensor<string, []>("op_9537_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9538_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_923_cast_fp16)[name = tensor<string, []>("op_9538_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9539_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_925_cast_fp16)[name = tensor<string, []>("op_9539_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9540_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_927_cast_fp16)[name = tensor<string, []>("op_9540_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9541_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_929_cast_fp16)[name = tensor<string, []>("op_9541_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9542_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_931_cast_fp16)[name = tensor<string, []>("op_9542_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9543_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_933_cast_fp16)[name = tensor<string, []>("op_9543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9544_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_935_cast_fp16)[name = tensor<string, []>("op_9544_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9545_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_937_cast_fp16)[name = tensor<string, []>("op_9545_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9546_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_939_cast_fp16)[name = tensor<string, []>("op_9546_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9547_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_941_cast_fp16)[name = tensor<string, []>("op_9547_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9548_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_943_cast_fp16)[name = tensor<string, []>("op_9548_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9549_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_945_cast_fp16)[name = tensor<string, []>("op_9549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9550_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_947_cast_fp16)[name = tensor<string, []>("op_9550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9551_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_949_cast_fp16)[name = tensor<string, []>("op_9551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9552_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_951_cast_fp16)[name = tensor<string, []>("op_9552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9553_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_953_cast_fp16)[name = tensor<string, []>("op_9553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9554_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_955_cast_fp16)[name = tensor<string, []>("op_9554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9555_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_957_cast_fp16)[name = tensor<string, []>("op_9555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9556_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_959_cast_fp16)[name = tensor<string, []>("op_9556_cast_fp16")];
+            tensor<string, []> var_9558_equation_0 = const()[name = tensor<string, []>("op_9558_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9558_cast_fp16 = einsum(equation = var_9558_equation_0, values = (var_9270_cast_fp16, var_9509_cast_fp16))[name = tensor<string, []>("op_9558_cast_fp16")];
+            tensor<string, []> var_9560_equation_0 = const()[name = tensor<string, []>("op_9560_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9560_cast_fp16 = einsum(equation = var_9560_equation_0, values = (var_9270_cast_fp16, var_9510_cast_fp16))[name = tensor<string, []>("op_9560_cast_fp16")];
+            tensor<string, []> var_9562_equation_0 = const()[name = tensor<string, []>("op_9562_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9562_cast_fp16 = einsum(equation = var_9562_equation_0, values = (var_9270_cast_fp16, var_9511_cast_fp16))[name = tensor<string, []>("op_9562_cast_fp16")];
+            tensor<string, []> var_9564_equation_0 = const()[name = tensor<string, []>("op_9564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9564_cast_fp16 = einsum(equation = var_9564_equation_0, values = (var_9270_cast_fp16, var_9512_cast_fp16))[name = tensor<string, []>("op_9564_cast_fp16")];
+            tensor<string, []> var_9566_equation_0 = const()[name = tensor<string, []>("op_9566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9566_cast_fp16 = einsum(equation = var_9566_equation_0, values = (var_9274_cast_fp16, var_9513_cast_fp16))[name = tensor<string, []>("op_9566_cast_fp16")];
+            tensor<string, []> var_9568_equation_0 = const()[name = tensor<string, []>("op_9568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9568_cast_fp16 = einsum(equation = var_9568_equation_0, values = (var_9274_cast_fp16, var_9514_cast_fp16))[name = tensor<string, []>("op_9568_cast_fp16")];
+            tensor<string, []> var_9570_equation_0 = const()[name = tensor<string, []>("op_9570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9570_cast_fp16 = einsum(equation = var_9570_equation_0, values = (var_9274_cast_fp16, var_9515_cast_fp16))[name = tensor<string, []>("op_9570_cast_fp16")];
+            tensor<string, []> var_9572_equation_0 = const()[name = tensor<string, []>("op_9572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9572_cast_fp16 = einsum(equation = var_9572_equation_0, values = (var_9274_cast_fp16, var_9516_cast_fp16))[name = tensor<string, []>("op_9572_cast_fp16")];
+            tensor<string, []> var_9574_equation_0 = const()[name = tensor<string, []>("op_9574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9574_cast_fp16 = einsum(equation = var_9574_equation_0, values = (var_9278_cast_fp16, var_9517_cast_fp16))[name = tensor<string, []>("op_9574_cast_fp16")];
+            tensor<string, []> var_9576_equation_0 = const()[name = tensor<string, []>("op_9576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9576_cast_fp16 = einsum(equation = var_9576_equation_0, values = (var_9278_cast_fp16, var_9518_cast_fp16))[name = tensor<string, []>("op_9576_cast_fp16")];
+            tensor<string, []> var_9578_equation_0 = const()[name = tensor<string, []>("op_9578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9578_cast_fp16 = einsum(equation = var_9578_equation_0, values = (var_9278_cast_fp16, var_9519_cast_fp16))[name = tensor<string, []>("op_9578_cast_fp16")];
+            tensor<string, []> var_9580_equation_0 = const()[name = tensor<string, []>("op_9580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9580_cast_fp16 = einsum(equation = var_9580_equation_0, values = (var_9278_cast_fp16, var_9520_cast_fp16))[name = tensor<string, []>("op_9580_cast_fp16")];
+            tensor<string, []> var_9582_equation_0 = const()[name = tensor<string, []>("op_9582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9582_cast_fp16 = einsum(equation = var_9582_equation_0, values = (var_9282_cast_fp16, var_9521_cast_fp16))[name = tensor<string, []>("op_9582_cast_fp16")];
+            tensor<string, []> var_9584_equation_0 = const()[name = tensor<string, []>("op_9584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9584_cast_fp16 = einsum(equation = var_9584_equation_0, values = (var_9282_cast_fp16, var_9522_cast_fp16))[name = tensor<string, []>("op_9584_cast_fp16")];
+            tensor<string, []> var_9586_equation_0 = const()[name = tensor<string, []>("op_9586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9586_cast_fp16 = einsum(equation = var_9586_equation_0, values = (var_9282_cast_fp16, var_9523_cast_fp16))[name = tensor<string, []>("op_9586_cast_fp16")];
+            tensor<string, []> var_9588_equation_0 = const()[name = tensor<string, []>("op_9588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9588_cast_fp16 = einsum(equation = var_9588_equation_0, values = (var_9282_cast_fp16, var_9524_cast_fp16))[name = tensor<string, []>("op_9588_cast_fp16")];
+            tensor<string, []> var_9590_equation_0 = const()[name = tensor<string, []>("op_9590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9590_cast_fp16 = einsum(equation = var_9590_equation_0, values = (var_9286_cast_fp16, var_9525_cast_fp16))[name = tensor<string, []>("op_9590_cast_fp16")];
+            tensor<string, []> var_9592_equation_0 = const()[name = tensor<string, []>("op_9592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9592_cast_fp16 = einsum(equation = var_9592_equation_0, values = (var_9286_cast_fp16, var_9526_cast_fp16))[name = tensor<string, []>("op_9592_cast_fp16")];
+            tensor<string, []> var_9594_equation_0 = const()[name = tensor<string, []>("op_9594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9594_cast_fp16 = einsum(equation = var_9594_equation_0, values = (var_9286_cast_fp16, var_9527_cast_fp16))[name = tensor<string, []>("op_9594_cast_fp16")];
+            tensor<string, []> var_9596_equation_0 = const()[name = tensor<string, []>("op_9596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9596_cast_fp16 = einsum(equation = var_9596_equation_0, values = (var_9286_cast_fp16, var_9528_cast_fp16))[name = tensor<string, []>("op_9596_cast_fp16")];
+            tensor<string, []> var_9598_equation_0 = const()[name = tensor<string, []>("op_9598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9598_cast_fp16 = einsum(equation = var_9598_equation_0, values = (var_9290_cast_fp16, var_9529_cast_fp16))[name = tensor<string, []>("op_9598_cast_fp16")];
+            tensor<string, []> var_9600_equation_0 = const()[name = tensor<string, []>("op_9600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9600_cast_fp16 = einsum(equation = var_9600_equation_0, values = (var_9290_cast_fp16, var_9530_cast_fp16))[name = tensor<string, []>("op_9600_cast_fp16")];
+            tensor<string, []> var_9602_equation_0 = const()[name = tensor<string, []>("op_9602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9602_cast_fp16 = einsum(equation = var_9602_equation_0, values = (var_9290_cast_fp16, var_9531_cast_fp16))[name = tensor<string, []>("op_9602_cast_fp16")];
+            tensor<string, []> var_9604_equation_0 = const()[name = tensor<string, []>("op_9604_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9604_cast_fp16 = einsum(equation = var_9604_equation_0, values = (var_9290_cast_fp16, var_9532_cast_fp16))[name = tensor<string, []>("op_9604_cast_fp16")];
+            tensor<string, []> var_9606_equation_0 = const()[name = tensor<string, []>("op_9606_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9606_cast_fp16 = einsum(equation = var_9606_equation_0, values = (var_9294_cast_fp16, var_9533_cast_fp16))[name = tensor<string, []>("op_9606_cast_fp16")];
+            tensor<string, []> var_9608_equation_0 = const()[name = tensor<string, []>("op_9608_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9608_cast_fp16 = einsum(equation = var_9608_equation_0, values = (var_9294_cast_fp16, var_9534_cast_fp16))[name = tensor<string, []>("op_9608_cast_fp16")];
+            tensor<string, []> var_9610_equation_0 = const()[name = tensor<string, []>("op_9610_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9610_cast_fp16 = einsum(equation = var_9610_equation_0, values = (var_9294_cast_fp16, var_9535_cast_fp16))[name = tensor<string, []>("op_9610_cast_fp16")];
+            tensor<string, []> var_9612_equation_0 = const()[name = tensor<string, []>("op_9612_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9612_cast_fp16 = einsum(equation = var_9612_equation_0, values = (var_9294_cast_fp16, var_9536_cast_fp16))[name = tensor<string, []>("op_9612_cast_fp16")];
+            tensor<string, []> var_9614_equation_0 = const()[name = tensor<string, []>("op_9614_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9614_cast_fp16 = einsum(equation = var_9614_equation_0, values = (var_9298_cast_fp16, var_9537_cast_fp16))[name = tensor<string, []>("op_9614_cast_fp16")];
+            tensor<string, []> var_9616_equation_0 = const()[name = tensor<string, []>("op_9616_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9616_cast_fp16 = einsum(equation = var_9616_equation_0, values = (var_9298_cast_fp16, var_9538_cast_fp16))[name = tensor<string, []>("op_9616_cast_fp16")];
+            tensor<string, []> var_9618_equation_0 = const()[name = tensor<string, []>("op_9618_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9618_cast_fp16 = einsum(equation = var_9618_equation_0, values = (var_9298_cast_fp16, var_9539_cast_fp16))[name = tensor<string, []>("op_9618_cast_fp16")];
+            tensor<string, []> var_9620_equation_0 = const()[name = tensor<string, []>("op_9620_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9620_cast_fp16 = einsum(equation = var_9620_equation_0, values = (var_9298_cast_fp16, var_9540_cast_fp16))[name = tensor<string, []>("op_9620_cast_fp16")];
+            tensor<string, []> var_9622_equation_0 = const()[name = tensor<string, []>("op_9622_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9622_cast_fp16 = einsum(equation = var_9622_equation_0, values = (var_9302_cast_fp16, var_9541_cast_fp16))[name = tensor<string, []>("op_9622_cast_fp16")];
+            tensor<string, []> var_9624_equation_0 = const()[name = tensor<string, []>("op_9624_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9624_cast_fp16 = einsum(equation = var_9624_equation_0, values = (var_9302_cast_fp16, var_9542_cast_fp16))[name = tensor<string, []>("op_9624_cast_fp16")];
+            tensor<string, []> var_9626_equation_0 = const()[name = tensor<string, []>("op_9626_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9626_cast_fp16 = einsum(equation = var_9626_equation_0, values = (var_9302_cast_fp16, var_9543_cast_fp16))[name = tensor<string, []>("op_9626_cast_fp16")];
+            tensor<string, []> var_9628_equation_0 = const()[name = tensor<string, []>("op_9628_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9628_cast_fp16 = einsum(equation = var_9628_equation_0, values = (var_9302_cast_fp16, var_9544_cast_fp16))[name = tensor<string, []>("op_9628_cast_fp16")];
+            tensor<string, []> var_9630_equation_0 = const()[name = tensor<string, []>("op_9630_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9630_cast_fp16 = einsum(equation = var_9630_equation_0, values = (var_9306_cast_fp16, var_9545_cast_fp16))[name = tensor<string, []>("op_9630_cast_fp16")];
+            tensor<string, []> var_9632_equation_0 = const()[name = tensor<string, []>("op_9632_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9632_cast_fp16 = einsum(equation = var_9632_equation_0, values = (var_9306_cast_fp16, var_9546_cast_fp16))[name = tensor<string, []>("op_9632_cast_fp16")];
+            tensor<string, []> var_9634_equation_0 = const()[name = tensor<string, []>("op_9634_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9634_cast_fp16 = einsum(equation = var_9634_equation_0, values = (var_9306_cast_fp16, var_9547_cast_fp16))[name = tensor<string, []>("op_9634_cast_fp16")];
+            tensor<string, []> var_9636_equation_0 = const()[name = tensor<string, []>("op_9636_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9636_cast_fp16 = einsum(equation = var_9636_equation_0, values = (var_9306_cast_fp16, var_9548_cast_fp16))[name = tensor<string, []>("op_9636_cast_fp16")];
+            tensor<string, []> var_9638_equation_0 = const()[name = tensor<string, []>("op_9638_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9638_cast_fp16 = einsum(equation = var_9638_equation_0, values = (var_9310_cast_fp16, var_9549_cast_fp16))[name = tensor<string, []>("op_9638_cast_fp16")];
+            tensor<string, []> var_9640_equation_0 = const()[name = tensor<string, []>("op_9640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9640_cast_fp16 = einsum(equation = var_9640_equation_0, values = (var_9310_cast_fp16, var_9550_cast_fp16))[name = tensor<string, []>("op_9640_cast_fp16")];
+            tensor<string, []> var_9642_equation_0 = const()[name = tensor<string, []>("op_9642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9642_cast_fp16 = einsum(equation = var_9642_equation_0, values = (var_9310_cast_fp16, var_9551_cast_fp16))[name = tensor<string, []>("op_9642_cast_fp16")];
+            tensor<string, []> var_9644_equation_0 = const()[name = tensor<string, []>("op_9644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9644_cast_fp16 = einsum(equation = var_9644_equation_0, values = (var_9310_cast_fp16, var_9552_cast_fp16))[name = tensor<string, []>("op_9644_cast_fp16")];
+            tensor<string, []> var_9646_equation_0 = const()[name = tensor<string, []>("op_9646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9646_cast_fp16 = einsum(equation = var_9646_equation_0, values = (var_9314_cast_fp16, var_9553_cast_fp16))[name = tensor<string, []>("op_9646_cast_fp16")];
+            tensor<string, []> var_9648_equation_0 = const()[name = tensor<string, []>("op_9648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9648_cast_fp16 = einsum(equation = var_9648_equation_0, values = (var_9314_cast_fp16, var_9554_cast_fp16))[name = tensor<string, []>("op_9648_cast_fp16")];
+            tensor<string, []> var_9650_equation_0 = const()[name = tensor<string, []>("op_9650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9650_cast_fp16 = einsum(equation = var_9650_equation_0, values = (var_9314_cast_fp16, var_9555_cast_fp16))[name = tensor<string, []>("op_9650_cast_fp16")];
+            tensor<string, []> var_9652_equation_0 = const()[name = tensor<string, []>("op_9652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9652_cast_fp16 = einsum(equation = var_9652_equation_0, values = (var_9314_cast_fp16, var_9556_cast_fp16))[name = tensor<string, []>("op_9652_cast_fp16")];
+            tensor<bool, []> var_9654_interleave_0 = const()[name = tensor<string, []>("op_9654_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9654_cast_fp16 = concat(axis = var_8765, interleave = var_9654_interleave_0, values = (var_9558_cast_fp16, var_9560_cast_fp16, var_9562_cast_fp16, var_9564_cast_fp16))[name = tensor<string, []>("op_9654_cast_fp16")];
+            tensor<bool, []> var_9656_interleave_0 = const()[name = tensor<string, []>("op_9656_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9656_cast_fp16 = concat(axis = var_8765, interleave = var_9656_interleave_0, values = (var_9566_cast_fp16, var_9568_cast_fp16, var_9570_cast_fp16, var_9572_cast_fp16))[name = tensor<string, []>("op_9656_cast_fp16")];
+            tensor<bool, []> var_9658_interleave_0 = const()[name = tensor<string, []>("op_9658_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9658_cast_fp16 = concat(axis = var_8765, interleave = var_9658_interleave_0, values = (var_9574_cast_fp16, var_9576_cast_fp16, var_9578_cast_fp16, var_9580_cast_fp16))[name = tensor<string, []>("op_9658_cast_fp16")];
+            tensor<bool, []> var_9660_interleave_0 = const()[name = tensor<string, []>("op_9660_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9660_cast_fp16 = concat(axis = var_8765, interleave = var_9660_interleave_0, values = (var_9582_cast_fp16, var_9584_cast_fp16, var_9586_cast_fp16, var_9588_cast_fp16))[name = tensor<string, []>("op_9660_cast_fp16")];
+            tensor<bool, []> var_9662_interleave_0 = const()[name = tensor<string, []>("op_9662_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9662_cast_fp16 = concat(axis = var_8765, interleave = var_9662_interleave_0, values = (var_9590_cast_fp16, var_9592_cast_fp16, var_9594_cast_fp16, var_9596_cast_fp16))[name = tensor<string, []>("op_9662_cast_fp16")];
+            tensor<bool, []> var_9664_interleave_0 = const()[name = tensor<string, []>("op_9664_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9664_cast_fp16 = concat(axis = var_8765, interleave = var_9664_interleave_0, values = (var_9598_cast_fp16, var_9600_cast_fp16, var_9602_cast_fp16, var_9604_cast_fp16))[name = tensor<string, []>("op_9664_cast_fp16")];
+            tensor<bool, []> var_9666_interleave_0 = const()[name = tensor<string, []>("op_9666_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9666_cast_fp16 = concat(axis = var_8765, interleave = var_9666_interleave_0, values = (var_9606_cast_fp16, var_9608_cast_fp16, var_9610_cast_fp16, var_9612_cast_fp16))[name = tensor<string, []>("op_9666_cast_fp16")];
+            tensor<bool, []> var_9668_interleave_0 = const()[name = tensor<string, []>("op_9668_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9668_cast_fp16 = concat(axis = var_8765, interleave = var_9668_interleave_0, values = (var_9614_cast_fp16, var_9616_cast_fp16, var_9618_cast_fp16, var_9620_cast_fp16))[name = tensor<string, []>("op_9668_cast_fp16")];
+            tensor<bool, []> var_9670_interleave_0 = const()[name = tensor<string, []>("op_9670_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9670_cast_fp16 = concat(axis = var_8765, interleave = var_9670_interleave_0, values = (var_9622_cast_fp16, var_9624_cast_fp16, var_9626_cast_fp16, var_9628_cast_fp16))[name = tensor<string, []>("op_9670_cast_fp16")];
+            tensor<bool, []> var_9672_interleave_0 = const()[name = tensor<string, []>("op_9672_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9672_cast_fp16 = concat(axis = var_8765, interleave = var_9672_interleave_0, values = (var_9630_cast_fp16, var_9632_cast_fp16, var_9634_cast_fp16, var_9636_cast_fp16))[name = tensor<string, []>("op_9672_cast_fp16")];
+            tensor<bool, []> var_9674_interleave_0 = const()[name = tensor<string, []>("op_9674_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9674_cast_fp16 = concat(axis = var_8765, interleave = var_9674_interleave_0, values = (var_9638_cast_fp16, var_9640_cast_fp16, var_9642_cast_fp16, var_9644_cast_fp16))[name = tensor<string, []>("op_9674_cast_fp16")];
+            tensor<bool, []> var_9676_interleave_0 = const()[name = tensor<string, []>("op_9676_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9676_cast_fp16 = concat(axis = var_8765, interleave = var_9676_interleave_0, values = (var_9646_cast_fp16, var_9648_cast_fp16, var_9650_cast_fp16, var_9652_cast_fp16))[name = tensor<string, []>("op_9676_cast_fp16")];
+            tensor<bool, []> input_73_interleave_0 = const()[name = tensor<string, []>("input_73_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = concat(axis = var_8782, interleave = input_73_interleave_0, values = (var_9654_cast_fp16, var_9656_cast_fp16, var_9658_cast_fp16, var_9660_cast_fp16, var_9662_cast_fp16, var_9664_cast_fp16, var_9666_cast_fp16, var_9668_cast_fp16, var_9670_cast_fp16, var_9672_cast_fp16, var_9674_cast_fp16, var_9676_cast_fp16))[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<int32, [2]> var_9681 = const()[name = tensor<string, []>("op_9681"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9683 = const()[name = tensor<string, []>("op_9683"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137340288)))];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138520000)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_39_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = var_9683, groups = var_8782, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_9681, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> var_9689 = const()[name = tensor<string, []>("op_9689"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_39_cast_fp16 = reduce_mean(axes = var_9689, keep_dims = var_8783, x = inputs_39_cast_fp16)[name = tensor<string, []>("channels_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_39_cast_fp16 = sub(x = inputs_39_cast_fp16, y = channels_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = zero_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_sq_39_cast_fp16")];
+            tensor<int32, [1]> var_9693 = const()[name = tensor<string, []>("op_9693"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_9694_cast_fp16 = reduce_mean(axes = var_9693, keep_dims = var_8783, x = zero_mean_sq_39_cast_fp16)[name = tensor<string, []>("op_9694_cast_fp16")];
+            tensor<fp16, []> var_9695_to_fp16 = const()[name = tensor<string, []>("op_9695_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_9696_cast_fp16 = add(x = var_9694_cast_fp16, y = var_9695_to_fp16)[name = tensor<string, []>("op_9696_cast_fp16")];
+            tensor<fp16, []> denom_39_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_39_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0_to_fp16, x = var_9696_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = denom_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138521600)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138523200)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<int32, [2]> var_9707 = const()[name = tensor<string, []>("op_9707"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9709 = const()[name = tensor<string, []>("op_9709"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138524800)))];
+            tensor<fp16, [3072]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143243456)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_77_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = var_9709, groups = var_8782, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = var_9707, weight = layers_9_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<int32, [2]> var_9715 = const()[name = tensor<string, []>("op_9715"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9717 = const()[name = tensor<string, []>("op_9717"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143249664)))];
+            tensor<fp16, [768]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147968320)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_23_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = var_9717, groups = var_8782, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = var_9715, weight = layers_9_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_9724 = const()[name = tensor<string, []>("op_9724"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_9741 = const()[name = tensor<string, []>("op_9741"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_9742 = const()[name = tensor<string, []>("op_9742"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_9752 = const()[name = tensor<string, []>("op_9752"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_41_cast_fp16 = reduce_mean(axes = var_9752, keep_dims = var_9742, x = inputs_41_cast_fp16)[name = tensor<string, []>("channels_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_41_cast_fp16 = sub(x = inputs_41_cast_fp16, y = channels_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = zero_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_sq_41_cast_fp16")];
+            tensor<int32, [1]> var_9756 = const()[name = tensor<string, []>("op_9756"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_9757_cast_fp16 = reduce_mean(axes = var_9756, keep_dims = var_9742, x = zero_mean_sq_41_cast_fp16)[name = tensor<string, []>("op_9757_cast_fp16")];
+            tensor<fp16, []> var_9758_to_fp16 = const()[name = tensor<string, []>("op_9758_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_9759_cast_fp16 = add(x = var_9757_cast_fp16, y = var_9758_to_fp16)[name = tensor<string, []>("op_9759_cast_fp16")];
+            tensor<fp16, []> denom_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0_to_fp16, x = var_9759_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = denom_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [768]> obj_41_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147969920)))];
+            tensor<fp16, [768]> obj_41_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_41_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147971520)))];
+            tensor<fp16, []> obj_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [2]> var_9774 = const()[name = tensor<string, []>("op_9774"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9776 = const()[name = tensor<string, []>("op_9776"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147973120)))];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149152832)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_21_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = var_9776, groups = var_9741, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_9774, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<int32, [2]> var_9780 = const()[name = tensor<string, []>("op_9780"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9782 = const()[name = tensor<string, []>("op_9782"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_21_pad_type_0 = const()[name = tensor<string, []>("key_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_21_pad_0 = const()[name = tensor<string, []>("key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149154432)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_21_cast_fp16 = conv(dilations = var_9782, groups = var_9741, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = var_9780, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<int32, [2]> var_9787 = const()[name = tensor<string, []>("op_9787"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9789 = const()[name = tensor<string, []>("op_9789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_21_pad_type_0 = const()[name = tensor<string, []>("value_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_21_pad_0 = const()[name = tensor<string, []>("value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(150334144)))];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151513856)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = var_9789, groups = var_9741, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = var_9787, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_9796_begin_0 = const()[name = tensor<string, []>("op_9796_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9796_end_0 = const()[name = tensor<string, []>("op_9796_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9796_end_mask_0 = const()[name = tensor<string, []>("op_9796_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9796_cast_fp16 = slice_by_index(begin = var_9796_begin_0, end = var_9796_end_0, end_mask = var_9796_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9796_cast_fp16")];
+            tensor<int32, [4]> var_9800_begin_0 = const()[name = tensor<string, []>("op_9800_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_9800_end_0 = const()[name = tensor<string, []>("op_9800_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_9800_end_mask_0 = const()[name = tensor<string, []>("op_9800_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9800_cast_fp16 = slice_by_index(begin = var_9800_begin_0, end = var_9800_end_0, end_mask = var_9800_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9800_cast_fp16")];
+            tensor<int32, [4]> var_9804_begin_0 = const()[name = tensor<string, []>("op_9804_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_9804_end_0 = const()[name = tensor<string, []>("op_9804_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_9804_end_mask_0 = const()[name = tensor<string, []>("op_9804_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9804_cast_fp16 = slice_by_index(begin = var_9804_begin_0, end = var_9804_end_0, end_mask = var_9804_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9804_cast_fp16")];
+            tensor<int32, [4]> var_9808_begin_0 = const()[name = tensor<string, []>("op_9808_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_9808_end_0 = const()[name = tensor<string, []>("op_9808_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_9808_end_mask_0 = const()[name = tensor<string, []>("op_9808_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9808_cast_fp16 = slice_by_index(begin = var_9808_begin_0, end = var_9808_end_0, end_mask = var_9808_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9808_cast_fp16")];
+            tensor<int32, [4]> var_9812_begin_0 = const()[name = tensor<string, []>("op_9812_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_9812_end_0 = const()[name = tensor<string, []>("op_9812_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_9812_end_mask_0 = const()[name = tensor<string, []>("op_9812_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9812_cast_fp16 = slice_by_index(begin = var_9812_begin_0, end = var_9812_end_0, end_mask = var_9812_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9812_cast_fp16")];
+            tensor<int32, [4]> var_9816_begin_0 = const()[name = tensor<string, []>("op_9816_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_9816_end_0 = const()[name = tensor<string, []>("op_9816_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_9816_end_mask_0 = const()[name = tensor<string, []>("op_9816_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9816_cast_fp16 = slice_by_index(begin = var_9816_begin_0, end = var_9816_end_0, end_mask = var_9816_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9816_cast_fp16")];
+            tensor<int32, [4]> var_9820_begin_0 = const()[name = tensor<string, []>("op_9820_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_9820_end_0 = const()[name = tensor<string, []>("op_9820_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_9820_end_mask_0 = const()[name = tensor<string, []>("op_9820_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9820_cast_fp16 = slice_by_index(begin = var_9820_begin_0, end = var_9820_end_0, end_mask = var_9820_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9820_cast_fp16")];
+            tensor<int32, [4]> var_9824_begin_0 = const()[name = tensor<string, []>("op_9824_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_9824_end_0 = const()[name = tensor<string, []>("op_9824_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_9824_end_mask_0 = const()[name = tensor<string, []>("op_9824_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9824_cast_fp16 = slice_by_index(begin = var_9824_begin_0, end = var_9824_end_0, end_mask = var_9824_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9824_cast_fp16")];
+            tensor<int32, [4]> var_9828_begin_0 = const()[name = tensor<string, []>("op_9828_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_9828_end_0 = const()[name = tensor<string, []>("op_9828_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_9828_end_mask_0 = const()[name = tensor<string, []>("op_9828_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9828_cast_fp16 = slice_by_index(begin = var_9828_begin_0, end = var_9828_end_0, end_mask = var_9828_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9828_cast_fp16")];
+            tensor<int32, [4]> var_9832_begin_0 = const()[name = tensor<string, []>("op_9832_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_9832_end_0 = const()[name = tensor<string, []>("op_9832_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_9832_end_mask_0 = const()[name = tensor<string, []>("op_9832_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9832_cast_fp16 = slice_by_index(begin = var_9832_begin_0, end = var_9832_end_0, end_mask = var_9832_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9832_cast_fp16")];
+            tensor<int32, [4]> var_9836_begin_0 = const()[name = tensor<string, []>("op_9836_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_9836_end_0 = const()[name = tensor<string, []>("op_9836_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_9836_end_mask_0 = const()[name = tensor<string, []>("op_9836_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9836_cast_fp16 = slice_by_index(begin = var_9836_begin_0, end = var_9836_end_0, end_mask = var_9836_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9836_cast_fp16")];
+            tensor<int32, [4]> var_9840_begin_0 = const()[name = tensor<string, []>("op_9840_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_9840_end_0 = const()[name = tensor<string, []>("op_9840_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_9840_end_mask_0 = const()[name = tensor<string, []>("op_9840_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9840_cast_fp16 = slice_by_index(begin = var_9840_begin_0, end = var_9840_end_0, end_mask = var_9840_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9840_cast_fp16")];
+            tensor<int32, [4]> var_9849_begin_0 = const()[name = tensor<string, []>("op_9849_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9849_end_0 = const()[name = tensor<string, []>("op_9849_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9849_end_mask_0 = const()[name = tensor<string, []>("op_9849_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9849_cast_fp16 = slice_by_index(begin = var_9849_begin_0, end = var_9849_end_0, end_mask = var_9849_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9849_cast_fp16")];
+            tensor<int32, [4]> var_9856_begin_0 = const()[name = tensor<string, []>("op_9856_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9856_end_0 = const()[name = tensor<string, []>("op_9856_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9856_end_mask_0 = const()[name = tensor<string, []>("op_9856_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9856_cast_fp16 = slice_by_index(begin = var_9856_begin_0, end = var_9856_end_0, end_mask = var_9856_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9856_cast_fp16")];
+            tensor<int32, [4]> var_9863_begin_0 = const()[name = tensor<string, []>("op_9863_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9863_end_0 = const()[name = tensor<string, []>("op_9863_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9863_end_mask_0 = const()[name = tensor<string, []>("op_9863_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9863_cast_fp16 = slice_by_index(begin = var_9863_begin_0, end = var_9863_end_0, end_mask = var_9863_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9863_cast_fp16")];
+            tensor<int32, [4]> var_9870_begin_0 = const()[name = tensor<string, []>("op_9870_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9870_end_0 = const()[name = tensor<string, []>("op_9870_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9870_end_mask_0 = const()[name = tensor<string, []>("op_9870_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9870_cast_fp16 = slice_by_index(begin = var_9870_begin_0, end = var_9870_end_0, end_mask = var_9870_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9870_cast_fp16")];
+            tensor<int32, [4]> var_9877_begin_0 = const()[name = tensor<string, []>("op_9877_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9877_end_0 = const()[name = tensor<string, []>("op_9877_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9877_end_mask_0 = const()[name = tensor<string, []>("op_9877_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9877_cast_fp16 = slice_by_index(begin = var_9877_begin_0, end = var_9877_end_0, end_mask = var_9877_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9877_cast_fp16")];
+            tensor<int32, [4]> var_9884_begin_0 = const()[name = tensor<string, []>("op_9884_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9884_end_0 = const()[name = tensor<string, []>("op_9884_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9884_end_mask_0 = const()[name = tensor<string, []>("op_9884_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9884_cast_fp16 = slice_by_index(begin = var_9884_begin_0, end = var_9884_end_0, end_mask = var_9884_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9884_cast_fp16")];
+            tensor<int32, [4]> var_9891_begin_0 = const()[name = tensor<string, []>("op_9891_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9891_end_0 = const()[name = tensor<string, []>("op_9891_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9891_end_mask_0 = const()[name = tensor<string, []>("op_9891_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9891_cast_fp16 = slice_by_index(begin = var_9891_begin_0, end = var_9891_end_0, end_mask = var_9891_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9891_cast_fp16")];
+            tensor<int32, [4]> var_9898_begin_0 = const()[name = tensor<string, []>("op_9898_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9898_end_0 = const()[name = tensor<string, []>("op_9898_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9898_end_mask_0 = const()[name = tensor<string, []>("op_9898_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9898_cast_fp16 = slice_by_index(begin = var_9898_begin_0, end = var_9898_end_0, end_mask = var_9898_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9898_cast_fp16")];
+            tensor<int32, [4]> var_9905_begin_0 = const()[name = tensor<string, []>("op_9905_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9905_end_0 = const()[name = tensor<string, []>("op_9905_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9905_end_mask_0 = const()[name = tensor<string, []>("op_9905_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9905_cast_fp16 = slice_by_index(begin = var_9905_begin_0, end = var_9905_end_0, end_mask = var_9905_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9905_cast_fp16")];
+            tensor<int32, [4]> var_9912_begin_0 = const()[name = tensor<string, []>("op_9912_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9912_end_0 = const()[name = tensor<string, []>("op_9912_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9912_end_mask_0 = const()[name = tensor<string, []>("op_9912_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9912_cast_fp16 = slice_by_index(begin = var_9912_begin_0, end = var_9912_end_0, end_mask = var_9912_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9912_cast_fp16")];
+            tensor<int32, [4]> var_9919_begin_0 = const()[name = tensor<string, []>("op_9919_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9919_end_0 = const()[name = tensor<string, []>("op_9919_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9919_end_mask_0 = const()[name = tensor<string, []>("op_9919_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9919_cast_fp16 = slice_by_index(begin = var_9919_begin_0, end = var_9919_end_0, end_mask = var_9919_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9919_cast_fp16")];
+            tensor<int32, [4]> var_9926_begin_0 = const()[name = tensor<string, []>("op_9926_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9926_end_0 = const()[name = tensor<string, []>("op_9926_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9926_end_mask_0 = const()[name = tensor<string, []>("op_9926_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9926_cast_fp16 = slice_by_index(begin = var_9926_begin_0, end = var_9926_end_0, end_mask = var_9926_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9926_cast_fp16")];
+            tensor<int32, [4]> var_9933_begin_0 = const()[name = tensor<string, []>("op_9933_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9933_end_0 = const()[name = tensor<string, []>("op_9933_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9933_end_mask_0 = const()[name = tensor<string, []>("op_9933_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9933_cast_fp16 = slice_by_index(begin = var_9933_begin_0, end = var_9933_end_0, end_mask = var_9933_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9933_cast_fp16")];
+            tensor<int32, [4]> var_9940_begin_0 = const()[name = tensor<string, []>("op_9940_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9940_end_0 = const()[name = tensor<string, []>("op_9940_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9940_end_mask_0 = const()[name = tensor<string, []>("op_9940_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9940_cast_fp16 = slice_by_index(begin = var_9940_begin_0, end = var_9940_end_0, end_mask = var_9940_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9940_cast_fp16")];
+            tensor<int32, [4]> var_9947_begin_0 = const()[name = tensor<string, []>("op_9947_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9947_end_0 = const()[name = tensor<string, []>("op_9947_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9947_end_mask_0 = const()[name = tensor<string, []>("op_9947_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9947_cast_fp16 = slice_by_index(begin = var_9947_begin_0, end = var_9947_end_0, end_mask = var_9947_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9947_cast_fp16")];
+            tensor<int32, [4]> var_9954_begin_0 = const()[name = tensor<string, []>("op_9954_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9954_end_0 = const()[name = tensor<string, []>("op_9954_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9954_end_mask_0 = const()[name = tensor<string, []>("op_9954_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9954_cast_fp16 = slice_by_index(begin = var_9954_begin_0, end = var_9954_end_0, end_mask = var_9954_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9954_cast_fp16")];
+            tensor<int32, [4]> var_9961_begin_0 = const()[name = tensor<string, []>("op_9961_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9961_end_0 = const()[name = tensor<string, []>("op_9961_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9961_end_mask_0 = const()[name = tensor<string, []>("op_9961_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9961_cast_fp16 = slice_by_index(begin = var_9961_begin_0, end = var_9961_end_0, end_mask = var_9961_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9961_cast_fp16")];
+            tensor<int32, [4]> var_9968_begin_0 = const()[name = tensor<string, []>("op_9968_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9968_end_0 = const()[name = tensor<string, []>("op_9968_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9968_end_mask_0 = const()[name = tensor<string, []>("op_9968_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9968_cast_fp16 = slice_by_index(begin = var_9968_begin_0, end = var_9968_end_0, end_mask = var_9968_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9968_cast_fp16")];
+            tensor<int32, [4]> var_9975_begin_0 = const()[name = tensor<string, []>("op_9975_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9975_end_0 = const()[name = tensor<string, []>("op_9975_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9975_end_mask_0 = const()[name = tensor<string, []>("op_9975_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9975_cast_fp16 = slice_by_index(begin = var_9975_begin_0, end = var_9975_end_0, end_mask = var_9975_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9975_cast_fp16")];
+            tensor<int32, [4]> var_9982_begin_0 = const()[name = tensor<string, []>("op_9982_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9982_end_0 = const()[name = tensor<string, []>("op_9982_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9982_end_mask_0 = const()[name = tensor<string, []>("op_9982_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9982_cast_fp16 = slice_by_index(begin = var_9982_begin_0, end = var_9982_end_0, end_mask = var_9982_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9982_cast_fp16")];
+            tensor<int32, [4]> var_9989_begin_0 = const()[name = tensor<string, []>("op_9989_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9989_end_0 = const()[name = tensor<string, []>("op_9989_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9989_end_mask_0 = const()[name = tensor<string, []>("op_9989_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9989_cast_fp16 = slice_by_index(begin = var_9989_begin_0, end = var_9989_end_0, end_mask = var_9989_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_9989_cast_fp16")];
+            tensor<int32, [4]> var_9996_begin_0 = const()[name = tensor<string, []>("op_9996_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9996_end_0 = const()[name = tensor<string, []>("op_9996_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9996_end_mask_0 = const()[name = tensor<string, []>("op_9996_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9996_cast_fp16 = slice_by_index(begin = var_9996_begin_0, end = var_9996_end_0, end_mask = var_9996_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_9996_cast_fp16")];
+            tensor<int32, [4]> var_10003_begin_0 = const()[name = tensor<string, []>("op_10003_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10003_end_0 = const()[name = tensor<string, []>("op_10003_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10003_end_mask_0 = const()[name = tensor<string, []>("op_10003_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10003_cast_fp16 = slice_by_index(begin = var_10003_begin_0, end = var_10003_end_0, end_mask = var_10003_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_10003_cast_fp16")];
+            tensor<int32, [4]> var_10010_begin_0 = const()[name = tensor<string, []>("op_10010_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10010_end_0 = const()[name = tensor<string, []>("op_10010_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10010_end_mask_0 = const()[name = tensor<string, []>("op_10010_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10010_cast_fp16 = slice_by_index(begin = var_10010_begin_0, end = var_10010_end_0, end_mask = var_10010_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_10010_cast_fp16")];
+            tensor<int32, [4]> var_10017_begin_0 = const()[name = tensor<string, []>("op_10017_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10017_end_0 = const()[name = tensor<string, []>("op_10017_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10017_end_mask_0 = const()[name = tensor<string, []>("op_10017_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10017_cast_fp16 = slice_by_index(begin = var_10017_begin_0, end = var_10017_end_0, end_mask = var_10017_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10017_cast_fp16")];
+            tensor<int32, [4]> var_10024_begin_0 = const()[name = tensor<string, []>("op_10024_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10024_end_0 = const()[name = tensor<string, []>("op_10024_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10024_end_mask_0 = const()[name = tensor<string, []>("op_10024_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10024_cast_fp16 = slice_by_index(begin = var_10024_begin_0, end = var_10024_end_0, end_mask = var_10024_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10024_cast_fp16")];
+            tensor<int32, [4]> var_10031_begin_0 = const()[name = tensor<string, []>("op_10031_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10031_end_0 = const()[name = tensor<string, []>("op_10031_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10031_end_mask_0 = const()[name = tensor<string, []>("op_10031_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10031_cast_fp16 = slice_by_index(begin = var_10031_begin_0, end = var_10031_end_0, end_mask = var_10031_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10031_cast_fp16")];
+            tensor<int32, [4]> var_10038_begin_0 = const()[name = tensor<string, []>("op_10038_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10038_end_0 = const()[name = tensor<string, []>("op_10038_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10038_end_mask_0 = const()[name = tensor<string, []>("op_10038_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10038_cast_fp16 = slice_by_index(begin = var_10038_begin_0, end = var_10038_end_0, end_mask = var_10038_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10038_cast_fp16")];
+            tensor<int32, [4]> var_10045_begin_0 = const()[name = tensor<string, []>("op_10045_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10045_end_0 = const()[name = tensor<string, []>("op_10045_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10045_end_mask_0 = const()[name = tensor<string, []>("op_10045_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10045_cast_fp16 = slice_by_index(begin = var_10045_begin_0, end = var_10045_end_0, end_mask = var_10045_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10045_cast_fp16")];
+            tensor<int32, [4]> var_10052_begin_0 = const()[name = tensor<string, []>("op_10052_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10052_end_0 = const()[name = tensor<string, []>("op_10052_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10052_end_mask_0 = const()[name = tensor<string, []>("op_10052_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10052_cast_fp16 = slice_by_index(begin = var_10052_begin_0, end = var_10052_end_0, end_mask = var_10052_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10052_cast_fp16")];
+            tensor<int32, [4]> var_10059_begin_0 = const()[name = tensor<string, []>("op_10059_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10059_end_0 = const()[name = tensor<string, []>("op_10059_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10059_end_mask_0 = const()[name = tensor<string, []>("op_10059_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10059_cast_fp16 = slice_by_index(begin = var_10059_begin_0, end = var_10059_end_0, end_mask = var_10059_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10059_cast_fp16")];
+            tensor<int32, [4]> var_10066_begin_0 = const()[name = tensor<string, []>("op_10066_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10066_end_0 = const()[name = tensor<string, []>("op_10066_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10066_end_mask_0 = const()[name = tensor<string, []>("op_10066_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10066_cast_fp16 = slice_by_index(begin = var_10066_begin_0, end = var_10066_end_0, end_mask = var_10066_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10066_cast_fp16")];
+            tensor<int32, [4]> var_10073_begin_0 = const()[name = tensor<string, []>("op_10073_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10073_end_0 = const()[name = tensor<string, []>("op_10073_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10073_end_mask_0 = const()[name = tensor<string, []>("op_10073_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10073_cast_fp16 = slice_by_index(begin = var_10073_begin_0, end = var_10073_end_0, end_mask = var_10073_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10073_cast_fp16")];
+            tensor<int32, [4]> var_10080_begin_0 = const()[name = tensor<string, []>("op_10080_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10080_end_0 = const()[name = tensor<string, []>("op_10080_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10080_end_mask_0 = const()[name = tensor<string, []>("op_10080_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10080_cast_fp16 = slice_by_index(begin = var_10080_begin_0, end = var_10080_end_0, end_mask = var_10080_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10080_cast_fp16")];
+            tensor<int32, [4]> var_10087_begin_0 = const()[name = tensor<string, []>("op_10087_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10087_end_0 = const()[name = tensor<string, []>("op_10087_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10087_end_mask_0 = const()[name = tensor<string, []>("op_10087_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10087_cast_fp16 = slice_by_index(begin = var_10087_begin_0, end = var_10087_end_0, end_mask = var_10087_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10087_cast_fp16")];
+            tensor<int32, [4]> var_10094_begin_0 = const()[name = tensor<string, []>("op_10094_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10094_end_0 = const()[name = tensor<string, []>("op_10094_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10094_end_mask_0 = const()[name = tensor<string, []>("op_10094_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10094_cast_fp16 = slice_by_index(begin = var_10094_begin_0, end = var_10094_end_0, end_mask = var_10094_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10094_cast_fp16")];
+            tensor<int32, [4]> var_10101_begin_0 = const()[name = tensor<string, []>("op_10101_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10101_end_0 = const()[name = tensor<string, []>("op_10101_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10101_end_mask_0 = const()[name = tensor<string, []>("op_10101_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10101_cast_fp16 = slice_by_index(begin = var_10101_begin_0, end = var_10101_end_0, end_mask = var_10101_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10101_cast_fp16")];
+            tensor<int32, [4]> var_10108_begin_0 = const()[name = tensor<string, []>("op_10108_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10108_end_0 = const()[name = tensor<string, []>("op_10108_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10108_end_mask_0 = const()[name = tensor<string, []>("op_10108_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10108_cast_fp16 = slice_by_index(begin = var_10108_begin_0, end = var_10108_end_0, end_mask = var_10108_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10108_cast_fp16")];
+            tensor<int32, [4]> var_10115_begin_0 = const()[name = tensor<string, []>("op_10115_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10115_end_0 = const()[name = tensor<string, []>("op_10115_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10115_end_mask_0 = const()[name = tensor<string, []>("op_10115_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10115_cast_fp16 = slice_by_index(begin = var_10115_begin_0, end = var_10115_end_0, end_mask = var_10115_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10115_cast_fp16")];
+            tensor<int32, [4]> var_10122_begin_0 = const()[name = tensor<string, []>("op_10122_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10122_end_0 = const()[name = tensor<string, []>("op_10122_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10122_end_mask_0 = const()[name = tensor<string, []>("op_10122_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10122_cast_fp16 = slice_by_index(begin = var_10122_begin_0, end = var_10122_end_0, end_mask = var_10122_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10122_cast_fp16")];
+            tensor<int32, [4]> var_10129_begin_0 = const()[name = tensor<string, []>("op_10129_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10129_end_0 = const()[name = tensor<string, []>("op_10129_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10129_end_mask_0 = const()[name = tensor<string, []>("op_10129_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10129_cast_fp16 = slice_by_index(begin = var_10129_begin_0, end = var_10129_end_0, end_mask = var_10129_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10129_cast_fp16")];
+            tensor<int32, [4]> var_10136_begin_0 = const()[name = tensor<string, []>("op_10136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10136_end_0 = const()[name = tensor<string, []>("op_10136_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10136_end_mask_0 = const()[name = tensor<string, []>("op_10136_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10136_cast_fp16 = slice_by_index(begin = var_10136_begin_0, end = var_10136_end_0, end_mask = var_10136_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10136_cast_fp16")];
+            tensor<int32, [4]> var_10143_begin_0 = const()[name = tensor<string, []>("op_10143_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10143_end_0 = const()[name = tensor<string, []>("op_10143_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10143_end_mask_0 = const()[name = tensor<string, []>("op_10143_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10143_cast_fp16 = slice_by_index(begin = var_10143_begin_0, end = var_10143_end_0, end_mask = var_10143_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10143_cast_fp16")];
+            tensor<int32, [4]> var_10150_begin_0 = const()[name = tensor<string, []>("op_10150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10150_end_0 = const()[name = tensor<string, []>("op_10150_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10150_end_mask_0 = const()[name = tensor<string, []>("op_10150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10150_cast_fp16 = slice_by_index(begin = var_10150_begin_0, end = var_10150_end_0, end_mask = var_10150_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10150_cast_fp16")];
+            tensor<int32, [4]> var_10157_begin_0 = const()[name = tensor<string, []>("op_10157_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10157_end_0 = const()[name = tensor<string, []>("op_10157_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10157_end_mask_0 = const()[name = tensor<string, []>("op_10157_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10157_cast_fp16 = slice_by_index(begin = var_10157_begin_0, end = var_10157_end_0, end_mask = var_10157_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10157_cast_fp16")];
+            tensor<int32, [4]> var_10164_begin_0 = const()[name = tensor<string, []>("op_10164_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10164_end_0 = const()[name = tensor<string, []>("op_10164_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10164_end_mask_0 = const()[name = tensor<string, []>("op_10164_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10164_cast_fp16 = slice_by_index(begin = var_10164_begin_0, end = var_10164_end_0, end_mask = var_10164_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10164_cast_fp16")];
+            tensor<int32, [4]> var_10171_begin_0 = const()[name = tensor<string, []>("op_10171_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10171_end_0 = const()[name = tensor<string, []>("op_10171_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10171_end_mask_0 = const()[name = tensor<string, []>("op_10171_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10171_cast_fp16 = slice_by_index(begin = var_10171_begin_0, end = var_10171_end_0, end_mask = var_10171_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10171_cast_fp16")];
+            tensor<int32, [4]> var_10178_begin_0 = const()[name = tensor<string, []>("op_10178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10178_end_0 = const()[name = tensor<string, []>("op_10178_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10178_end_mask_0 = const()[name = tensor<string, []>("op_10178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10178_cast_fp16 = slice_by_index(begin = var_10178_begin_0, end = var_10178_end_0, end_mask = var_10178_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10178_cast_fp16")];
+            tensor<int32, [4]> k_21_perm_0 = const()[name = tensor<string, []>("k_21_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_10183_begin_0 = const()[name = tensor<string, []>("op_10183_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10183_end_0 = const()[name = tensor<string, []>("op_10183_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_10183_end_mask_0 = const()[name = tensor<string, []>("op_10183_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_1 = transpose(perm = k_21_perm_0, x = key_21_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_10183_cast_fp16 = slice_by_index(begin = var_10183_begin_0, end = var_10183_end_0, end_mask = var_10183_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10183_cast_fp16")];
+            tensor<int32, [4]> var_10187_begin_0 = const()[name = tensor<string, []>("op_10187_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_10187_end_0 = const()[name = tensor<string, []>("op_10187_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_10187_end_mask_0 = const()[name = tensor<string, []>("op_10187_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10187_cast_fp16 = slice_by_index(begin = var_10187_begin_0, end = var_10187_end_0, end_mask = var_10187_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10187_cast_fp16")];
+            tensor<int32, [4]> var_10191_begin_0 = const()[name = tensor<string, []>("op_10191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_10191_end_0 = const()[name = tensor<string, []>("op_10191_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_10191_end_mask_0 = const()[name = tensor<string, []>("op_10191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10191_cast_fp16 = slice_by_index(begin = var_10191_begin_0, end = var_10191_end_0, end_mask = var_10191_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10191_cast_fp16")];
+            tensor<int32, [4]> var_10195_begin_0 = const()[name = tensor<string, []>("op_10195_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_10195_end_0 = const()[name = tensor<string, []>("op_10195_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_10195_end_mask_0 = const()[name = tensor<string, []>("op_10195_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10195_cast_fp16 = slice_by_index(begin = var_10195_begin_0, end = var_10195_end_0, end_mask = var_10195_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10195_cast_fp16")];
+            tensor<int32, [4]> var_10199_begin_0 = const()[name = tensor<string, []>("op_10199_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_10199_end_0 = const()[name = tensor<string, []>("op_10199_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_10199_end_mask_0 = const()[name = tensor<string, []>("op_10199_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10199_cast_fp16 = slice_by_index(begin = var_10199_begin_0, end = var_10199_end_0, end_mask = var_10199_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10199_cast_fp16")];
+            tensor<int32, [4]> var_10203_begin_0 = const()[name = tensor<string, []>("op_10203_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_10203_end_0 = const()[name = tensor<string, []>("op_10203_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_10203_end_mask_0 = const()[name = tensor<string, []>("op_10203_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10203_cast_fp16 = slice_by_index(begin = var_10203_begin_0, end = var_10203_end_0, end_mask = var_10203_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10203_cast_fp16")];
+            tensor<int32, [4]> var_10207_begin_0 = const()[name = tensor<string, []>("op_10207_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_10207_end_0 = const()[name = tensor<string, []>("op_10207_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_10207_end_mask_0 = const()[name = tensor<string, []>("op_10207_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10207_cast_fp16 = slice_by_index(begin = var_10207_begin_0, end = var_10207_end_0, end_mask = var_10207_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10207_cast_fp16")];
+            tensor<int32, [4]> var_10211_begin_0 = const()[name = tensor<string, []>("op_10211_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_10211_end_0 = const()[name = tensor<string, []>("op_10211_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_10211_end_mask_0 = const()[name = tensor<string, []>("op_10211_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10211_cast_fp16 = slice_by_index(begin = var_10211_begin_0, end = var_10211_end_0, end_mask = var_10211_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10211_cast_fp16")];
+            tensor<int32, [4]> var_10215_begin_0 = const()[name = tensor<string, []>("op_10215_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_10215_end_0 = const()[name = tensor<string, []>("op_10215_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_10215_end_mask_0 = const()[name = tensor<string, []>("op_10215_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10215_cast_fp16 = slice_by_index(begin = var_10215_begin_0, end = var_10215_end_0, end_mask = var_10215_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10215_cast_fp16")];
+            tensor<int32, [4]> var_10219_begin_0 = const()[name = tensor<string, []>("op_10219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_10219_end_0 = const()[name = tensor<string, []>("op_10219_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_10219_end_mask_0 = const()[name = tensor<string, []>("op_10219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10219_cast_fp16 = slice_by_index(begin = var_10219_begin_0, end = var_10219_end_0, end_mask = var_10219_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10219_cast_fp16")];
+            tensor<int32, [4]> var_10223_begin_0 = const()[name = tensor<string, []>("op_10223_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_10223_end_0 = const()[name = tensor<string, []>("op_10223_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_10223_end_mask_0 = const()[name = tensor<string, []>("op_10223_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10223_cast_fp16 = slice_by_index(begin = var_10223_begin_0, end = var_10223_end_0, end_mask = var_10223_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10223_cast_fp16")];
+            tensor<int32, [4]> var_10227_begin_0 = const()[name = tensor<string, []>("op_10227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_10227_end_0 = const()[name = tensor<string, []>("op_10227_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_10227_end_mask_0 = const()[name = tensor<string, []>("op_10227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10227_cast_fp16 = slice_by_index(begin = var_10227_begin_0, end = var_10227_end_0, end_mask = var_10227_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10227_cast_fp16")];
+            tensor<int32, [4]> var_10229_begin_0 = const()[name = tensor<string, []>("op_10229_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10229_end_0 = const()[name = tensor<string, []>("op_10229_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10229_end_mask_0 = const()[name = tensor<string, []>("op_10229_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10229_cast_fp16 = slice_by_index(begin = var_10229_begin_0, end = var_10229_end_0, end_mask = var_10229_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10229_cast_fp16")];
+            tensor<int32, [4]> var_10233_begin_0 = const()[name = tensor<string, []>("op_10233_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_10233_end_0 = const()[name = tensor<string, []>("op_10233_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_10233_end_mask_0 = const()[name = tensor<string, []>("op_10233_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10233_cast_fp16 = slice_by_index(begin = var_10233_begin_0, end = var_10233_end_0, end_mask = var_10233_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10233_cast_fp16")];
+            tensor<int32, [4]> var_10237_begin_0 = const()[name = tensor<string, []>("op_10237_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_10237_end_0 = const()[name = tensor<string, []>("op_10237_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_10237_end_mask_0 = const()[name = tensor<string, []>("op_10237_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10237_cast_fp16 = slice_by_index(begin = var_10237_begin_0, end = var_10237_end_0, end_mask = var_10237_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10237_cast_fp16")];
+            tensor<int32, [4]> var_10241_begin_0 = const()[name = tensor<string, []>("op_10241_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_10241_end_0 = const()[name = tensor<string, []>("op_10241_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_10241_end_mask_0 = const()[name = tensor<string, []>("op_10241_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10241_cast_fp16 = slice_by_index(begin = var_10241_begin_0, end = var_10241_end_0, end_mask = var_10241_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10241_cast_fp16")];
+            tensor<int32, [4]> var_10245_begin_0 = const()[name = tensor<string, []>("op_10245_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_10245_end_0 = const()[name = tensor<string, []>("op_10245_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_10245_end_mask_0 = const()[name = tensor<string, []>("op_10245_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10245_cast_fp16 = slice_by_index(begin = var_10245_begin_0, end = var_10245_end_0, end_mask = var_10245_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10245_cast_fp16")];
+            tensor<int32, [4]> var_10249_begin_0 = const()[name = tensor<string, []>("op_10249_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_10249_end_0 = const()[name = tensor<string, []>("op_10249_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_10249_end_mask_0 = const()[name = tensor<string, []>("op_10249_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10249_cast_fp16 = slice_by_index(begin = var_10249_begin_0, end = var_10249_end_0, end_mask = var_10249_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10249_cast_fp16")];
+            tensor<int32, [4]> var_10253_begin_0 = const()[name = tensor<string, []>("op_10253_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_10253_end_0 = const()[name = tensor<string, []>("op_10253_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_10253_end_mask_0 = const()[name = tensor<string, []>("op_10253_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10253_cast_fp16 = slice_by_index(begin = var_10253_begin_0, end = var_10253_end_0, end_mask = var_10253_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10253_cast_fp16")];
+            tensor<int32, [4]> var_10257_begin_0 = const()[name = tensor<string, []>("op_10257_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_10257_end_0 = const()[name = tensor<string, []>("op_10257_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_10257_end_mask_0 = const()[name = tensor<string, []>("op_10257_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10257_cast_fp16 = slice_by_index(begin = var_10257_begin_0, end = var_10257_end_0, end_mask = var_10257_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10257_cast_fp16")];
+            tensor<int32, [4]> var_10261_begin_0 = const()[name = tensor<string, []>("op_10261_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_10261_end_0 = const()[name = tensor<string, []>("op_10261_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_10261_end_mask_0 = const()[name = tensor<string, []>("op_10261_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10261_cast_fp16 = slice_by_index(begin = var_10261_begin_0, end = var_10261_end_0, end_mask = var_10261_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10261_cast_fp16")];
+            tensor<int32, [4]> var_10265_begin_0 = const()[name = tensor<string, []>("op_10265_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_10265_end_0 = const()[name = tensor<string, []>("op_10265_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_10265_end_mask_0 = const()[name = tensor<string, []>("op_10265_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10265_cast_fp16 = slice_by_index(begin = var_10265_begin_0, end = var_10265_end_0, end_mask = var_10265_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10265_cast_fp16")];
+            tensor<int32, [4]> var_10269_begin_0 = const()[name = tensor<string, []>("op_10269_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_10269_end_0 = const()[name = tensor<string, []>("op_10269_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_10269_end_mask_0 = const()[name = tensor<string, []>("op_10269_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10269_cast_fp16 = slice_by_index(begin = var_10269_begin_0, end = var_10269_end_0, end_mask = var_10269_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10269_cast_fp16")];
+            tensor<int32, [4]> var_10273_begin_0 = const()[name = tensor<string, []>("op_10273_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_10273_end_0 = const()[name = tensor<string, []>("op_10273_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_10273_end_mask_0 = const()[name = tensor<string, []>("op_10273_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10273_cast_fp16 = slice_by_index(begin = var_10273_begin_0, end = var_10273_end_0, end_mask = var_10273_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10273_cast_fp16")];
+            tensor<string, []> var_10277_equation_0 = const()[name = tensor<string, []>("op_10277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10277_cast_fp16 = einsum(equation = var_10277_equation_0, values = (var_10183_cast_fp16, var_9849_cast_fp16))[name = tensor<string, []>("op_10277_cast_fp16")];
+            tensor<fp16, []> var_10278_to_fp16 = const()[name = tensor<string, []>("op_10278_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_961_cast_fp16 = mul(x = var_10277_cast_fp16, y = var_10278_to_fp16)[name = tensor<string, []>("aw_chunk_961_cast_fp16")];
+            tensor<string, []> var_10281_equation_0 = const()[name = tensor<string, []>("op_10281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10281_cast_fp16 = einsum(equation = var_10281_equation_0, values = (var_10183_cast_fp16, var_9856_cast_fp16))[name = tensor<string, []>("op_10281_cast_fp16")];
+            tensor<fp16, []> var_10282_to_fp16 = const()[name = tensor<string, []>("op_10282_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_963_cast_fp16 = mul(x = var_10281_cast_fp16, y = var_10282_to_fp16)[name = tensor<string, []>("aw_chunk_963_cast_fp16")];
+            tensor<string, []> var_10285_equation_0 = const()[name = tensor<string, []>("op_10285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10285_cast_fp16 = einsum(equation = var_10285_equation_0, values = (var_10183_cast_fp16, var_9863_cast_fp16))[name = tensor<string, []>("op_10285_cast_fp16")];
+            tensor<fp16, []> var_10286_to_fp16 = const()[name = tensor<string, []>("op_10286_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_965_cast_fp16 = mul(x = var_10285_cast_fp16, y = var_10286_to_fp16)[name = tensor<string, []>("aw_chunk_965_cast_fp16")];
+            tensor<string, []> var_10289_equation_0 = const()[name = tensor<string, []>("op_10289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10289_cast_fp16 = einsum(equation = var_10289_equation_0, values = (var_10183_cast_fp16, var_9870_cast_fp16))[name = tensor<string, []>("op_10289_cast_fp16")];
+            tensor<fp16, []> var_10290_to_fp16 = const()[name = tensor<string, []>("op_10290_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_967_cast_fp16 = mul(x = var_10289_cast_fp16, y = var_10290_to_fp16)[name = tensor<string, []>("aw_chunk_967_cast_fp16")];
+            tensor<string, []> var_10293_equation_0 = const()[name = tensor<string, []>("op_10293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10293_cast_fp16 = einsum(equation = var_10293_equation_0, values = (var_10187_cast_fp16, var_9877_cast_fp16))[name = tensor<string, []>("op_10293_cast_fp16")];
+            tensor<fp16, []> var_10294_to_fp16 = const()[name = tensor<string, []>("op_10294_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_969_cast_fp16 = mul(x = var_10293_cast_fp16, y = var_10294_to_fp16)[name = tensor<string, []>("aw_chunk_969_cast_fp16")];
+            tensor<string, []> var_10297_equation_0 = const()[name = tensor<string, []>("op_10297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10297_cast_fp16 = einsum(equation = var_10297_equation_0, values = (var_10187_cast_fp16, var_9884_cast_fp16))[name = tensor<string, []>("op_10297_cast_fp16")];
+            tensor<fp16, []> var_10298_to_fp16 = const()[name = tensor<string, []>("op_10298_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_971_cast_fp16 = mul(x = var_10297_cast_fp16, y = var_10298_to_fp16)[name = tensor<string, []>("aw_chunk_971_cast_fp16")];
+            tensor<string, []> var_10301_equation_0 = const()[name = tensor<string, []>("op_10301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10301_cast_fp16 = einsum(equation = var_10301_equation_0, values = (var_10187_cast_fp16, var_9891_cast_fp16))[name = tensor<string, []>("op_10301_cast_fp16")];
+            tensor<fp16, []> var_10302_to_fp16 = const()[name = tensor<string, []>("op_10302_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_973_cast_fp16 = mul(x = var_10301_cast_fp16, y = var_10302_to_fp16)[name = tensor<string, []>("aw_chunk_973_cast_fp16")];
+            tensor<string, []> var_10305_equation_0 = const()[name = tensor<string, []>("op_10305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10305_cast_fp16 = einsum(equation = var_10305_equation_0, values = (var_10187_cast_fp16, var_9898_cast_fp16))[name = tensor<string, []>("op_10305_cast_fp16")];
+            tensor<fp16, []> var_10306_to_fp16 = const()[name = tensor<string, []>("op_10306_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_975_cast_fp16 = mul(x = var_10305_cast_fp16, y = var_10306_to_fp16)[name = tensor<string, []>("aw_chunk_975_cast_fp16")];
+            tensor<string, []> var_10309_equation_0 = const()[name = tensor<string, []>("op_10309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10309_cast_fp16 = einsum(equation = var_10309_equation_0, values = (var_10191_cast_fp16, var_9905_cast_fp16))[name = tensor<string, []>("op_10309_cast_fp16")];
+            tensor<fp16, []> var_10310_to_fp16 = const()[name = tensor<string, []>("op_10310_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_977_cast_fp16 = mul(x = var_10309_cast_fp16, y = var_10310_to_fp16)[name = tensor<string, []>("aw_chunk_977_cast_fp16")];
+            tensor<string, []> var_10313_equation_0 = const()[name = tensor<string, []>("op_10313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10313_cast_fp16 = einsum(equation = var_10313_equation_0, values = (var_10191_cast_fp16, var_9912_cast_fp16))[name = tensor<string, []>("op_10313_cast_fp16")];
+            tensor<fp16, []> var_10314_to_fp16 = const()[name = tensor<string, []>("op_10314_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_979_cast_fp16 = mul(x = var_10313_cast_fp16, y = var_10314_to_fp16)[name = tensor<string, []>("aw_chunk_979_cast_fp16")];
+            tensor<string, []> var_10317_equation_0 = const()[name = tensor<string, []>("op_10317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10317_cast_fp16 = einsum(equation = var_10317_equation_0, values = (var_10191_cast_fp16, var_9919_cast_fp16))[name = tensor<string, []>("op_10317_cast_fp16")];
+            tensor<fp16, []> var_10318_to_fp16 = const()[name = tensor<string, []>("op_10318_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_981_cast_fp16 = mul(x = var_10317_cast_fp16, y = var_10318_to_fp16)[name = tensor<string, []>("aw_chunk_981_cast_fp16")];
+            tensor<string, []> var_10321_equation_0 = const()[name = tensor<string, []>("op_10321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10321_cast_fp16 = einsum(equation = var_10321_equation_0, values = (var_10191_cast_fp16, var_9926_cast_fp16))[name = tensor<string, []>("op_10321_cast_fp16")];
+            tensor<fp16, []> var_10322_to_fp16 = const()[name = tensor<string, []>("op_10322_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_983_cast_fp16 = mul(x = var_10321_cast_fp16, y = var_10322_to_fp16)[name = tensor<string, []>("aw_chunk_983_cast_fp16")];
+            tensor<string, []> var_10325_equation_0 = const()[name = tensor<string, []>("op_10325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10325_cast_fp16 = einsum(equation = var_10325_equation_0, values = (var_10195_cast_fp16, var_9933_cast_fp16))[name = tensor<string, []>("op_10325_cast_fp16")];
+            tensor<fp16, []> var_10326_to_fp16 = const()[name = tensor<string, []>("op_10326_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_985_cast_fp16 = mul(x = var_10325_cast_fp16, y = var_10326_to_fp16)[name = tensor<string, []>("aw_chunk_985_cast_fp16")];
+            tensor<string, []> var_10329_equation_0 = const()[name = tensor<string, []>("op_10329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10329_cast_fp16 = einsum(equation = var_10329_equation_0, values = (var_10195_cast_fp16, var_9940_cast_fp16))[name = tensor<string, []>("op_10329_cast_fp16")];
+            tensor<fp16, []> var_10330_to_fp16 = const()[name = tensor<string, []>("op_10330_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_987_cast_fp16 = mul(x = var_10329_cast_fp16, y = var_10330_to_fp16)[name = tensor<string, []>("aw_chunk_987_cast_fp16")];
+            tensor<string, []> var_10333_equation_0 = const()[name = tensor<string, []>("op_10333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10333_cast_fp16 = einsum(equation = var_10333_equation_0, values = (var_10195_cast_fp16, var_9947_cast_fp16))[name = tensor<string, []>("op_10333_cast_fp16")];
+            tensor<fp16, []> var_10334_to_fp16 = const()[name = tensor<string, []>("op_10334_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_989_cast_fp16 = mul(x = var_10333_cast_fp16, y = var_10334_to_fp16)[name = tensor<string, []>("aw_chunk_989_cast_fp16")];
+            tensor<string, []> var_10337_equation_0 = const()[name = tensor<string, []>("op_10337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10337_cast_fp16 = einsum(equation = var_10337_equation_0, values = (var_10195_cast_fp16, var_9954_cast_fp16))[name = tensor<string, []>("op_10337_cast_fp16")];
+            tensor<fp16, []> var_10338_to_fp16 = const()[name = tensor<string, []>("op_10338_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_991_cast_fp16 = mul(x = var_10337_cast_fp16, y = var_10338_to_fp16)[name = tensor<string, []>("aw_chunk_991_cast_fp16")];
+            tensor<string, []> var_10341_equation_0 = const()[name = tensor<string, []>("op_10341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10341_cast_fp16 = einsum(equation = var_10341_equation_0, values = (var_10199_cast_fp16, var_9961_cast_fp16))[name = tensor<string, []>("op_10341_cast_fp16")];
+            tensor<fp16, []> var_10342_to_fp16 = const()[name = tensor<string, []>("op_10342_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_993_cast_fp16 = mul(x = var_10341_cast_fp16, y = var_10342_to_fp16)[name = tensor<string, []>("aw_chunk_993_cast_fp16")];
+            tensor<string, []> var_10345_equation_0 = const()[name = tensor<string, []>("op_10345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10345_cast_fp16 = einsum(equation = var_10345_equation_0, values = (var_10199_cast_fp16, var_9968_cast_fp16))[name = tensor<string, []>("op_10345_cast_fp16")];
+            tensor<fp16, []> var_10346_to_fp16 = const()[name = tensor<string, []>("op_10346_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_995_cast_fp16 = mul(x = var_10345_cast_fp16, y = var_10346_to_fp16)[name = tensor<string, []>("aw_chunk_995_cast_fp16")];
+            tensor<string, []> var_10349_equation_0 = const()[name = tensor<string, []>("op_10349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10349_cast_fp16 = einsum(equation = var_10349_equation_0, values = (var_10199_cast_fp16, var_9975_cast_fp16))[name = tensor<string, []>("op_10349_cast_fp16")];
+            tensor<fp16, []> var_10350_to_fp16 = const()[name = tensor<string, []>("op_10350_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_997_cast_fp16 = mul(x = var_10349_cast_fp16, y = var_10350_to_fp16)[name = tensor<string, []>("aw_chunk_997_cast_fp16")];
+            tensor<string, []> var_10353_equation_0 = const()[name = tensor<string, []>("op_10353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10353_cast_fp16 = einsum(equation = var_10353_equation_0, values = (var_10199_cast_fp16, var_9982_cast_fp16))[name = tensor<string, []>("op_10353_cast_fp16")];
+            tensor<fp16, []> var_10354_to_fp16 = const()[name = tensor<string, []>("op_10354_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_999_cast_fp16 = mul(x = var_10353_cast_fp16, y = var_10354_to_fp16)[name = tensor<string, []>("aw_chunk_999_cast_fp16")];
+            tensor<string, []> var_10357_equation_0 = const()[name = tensor<string, []>("op_10357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10357_cast_fp16 = einsum(equation = var_10357_equation_0, values = (var_10203_cast_fp16, var_9989_cast_fp16))[name = tensor<string, []>("op_10357_cast_fp16")];
+            tensor<fp16, []> var_10358_to_fp16 = const()[name = tensor<string, []>("op_10358_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1001_cast_fp16 = mul(x = var_10357_cast_fp16, y = var_10358_to_fp16)[name = tensor<string, []>("aw_chunk_1001_cast_fp16")];
+            tensor<string, []> var_10361_equation_0 = const()[name = tensor<string, []>("op_10361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10361_cast_fp16 = einsum(equation = var_10361_equation_0, values = (var_10203_cast_fp16, var_9996_cast_fp16))[name = tensor<string, []>("op_10361_cast_fp16")];
+            tensor<fp16, []> var_10362_to_fp16 = const()[name = tensor<string, []>("op_10362_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1003_cast_fp16 = mul(x = var_10361_cast_fp16, y = var_10362_to_fp16)[name = tensor<string, []>("aw_chunk_1003_cast_fp16")];
+            tensor<string, []> var_10365_equation_0 = const()[name = tensor<string, []>("op_10365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10365_cast_fp16 = einsum(equation = var_10365_equation_0, values = (var_10203_cast_fp16, var_10003_cast_fp16))[name = tensor<string, []>("op_10365_cast_fp16")];
+            tensor<fp16, []> var_10366_to_fp16 = const()[name = tensor<string, []>("op_10366_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1005_cast_fp16 = mul(x = var_10365_cast_fp16, y = var_10366_to_fp16)[name = tensor<string, []>("aw_chunk_1005_cast_fp16")];
+            tensor<string, []> var_10369_equation_0 = const()[name = tensor<string, []>("op_10369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10369_cast_fp16 = einsum(equation = var_10369_equation_0, values = (var_10203_cast_fp16, var_10010_cast_fp16))[name = tensor<string, []>("op_10369_cast_fp16")];
+            tensor<fp16, []> var_10370_to_fp16 = const()[name = tensor<string, []>("op_10370_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1007_cast_fp16 = mul(x = var_10369_cast_fp16, y = var_10370_to_fp16)[name = tensor<string, []>("aw_chunk_1007_cast_fp16")];
+            tensor<string, []> var_10373_equation_0 = const()[name = tensor<string, []>("op_10373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10373_cast_fp16 = einsum(equation = var_10373_equation_0, values = (var_10207_cast_fp16, var_10017_cast_fp16))[name = tensor<string, []>("op_10373_cast_fp16")];
+            tensor<fp16, []> var_10374_to_fp16 = const()[name = tensor<string, []>("op_10374_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1009_cast_fp16 = mul(x = var_10373_cast_fp16, y = var_10374_to_fp16)[name = tensor<string, []>("aw_chunk_1009_cast_fp16")];
+            tensor<string, []> var_10377_equation_0 = const()[name = tensor<string, []>("op_10377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10377_cast_fp16 = einsum(equation = var_10377_equation_0, values = (var_10207_cast_fp16, var_10024_cast_fp16))[name = tensor<string, []>("op_10377_cast_fp16")];
+            tensor<fp16, []> var_10378_to_fp16 = const()[name = tensor<string, []>("op_10378_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1011_cast_fp16 = mul(x = var_10377_cast_fp16, y = var_10378_to_fp16)[name = tensor<string, []>("aw_chunk_1011_cast_fp16")];
+            tensor<string, []> var_10381_equation_0 = const()[name = tensor<string, []>("op_10381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10381_cast_fp16 = einsum(equation = var_10381_equation_0, values = (var_10207_cast_fp16, var_10031_cast_fp16))[name = tensor<string, []>("op_10381_cast_fp16")];
+            tensor<fp16, []> var_10382_to_fp16 = const()[name = tensor<string, []>("op_10382_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1013_cast_fp16 = mul(x = var_10381_cast_fp16, y = var_10382_to_fp16)[name = tensor<string, []>("aw_chunk_1013_cast_fp16")];
+            tensor<string, []> var_10385_equation_0 = const()[name = tensor<string, []>("op_10385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10385_cast_fp16 = einsum(equation = var_10385_equation_0, values = (var_10207_cast_fp16, var_10038_cast_fp16))[name = tensor<string, []>("op_10385_cast_fp16")];
+            tensor<fp16, []> var_10386_to_fp16 = const()[name = tensor<string, []>("op_10386_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1015_cast_fp16 = mul(x = var_10385_cast_fp16, y = var_10386_to_fp16)[name = tensor<string, []>("aw_chunk_1015_cast_fp16")];
+            tensor<string, []> var_10389_equation_0 = const()[name = tensor<string, []>("op_10389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10389_cast_fp16 = einsum(equation = var_10389_equation_0, values = (var_10211_cast_fp16, var_10045_cast_fp16))[name = tensor<string, []>("op_10389_cast_fp16")];
+            tensor<fp16, []> var_10390_to_fp16 = const()[name = tensor<string, []>("op_10390_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1017_cast_fp16 = mul(x = var_10389_cast_fp16, y = var_10390_to_fp16)[name = tensor<string, []>("aw_chunk_1017_cast_fp16")];
+            tensor<string, []> var_10393_equation_0 = const()[name = tensor<string, []>("op_10393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10393_cast_fp16 = einsum(equation = var_10393_equation_0, values = (var_10211_cast_fp16, var_10052_cast_fp16))[name = tensor<string, []>("op_10393_cast_fp16")];
+            tensor<fp16, []> var_10394_to_fp16 = const()[name = tensor<string, []>("op_10394_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1019_cast_fp16 = mul(x = var_10393_cast_fp16, y = var_10394_to_fp16)[name = tensor<string, []>("aw_chunk_1019_cast_fp16")];
+            tensor<string, []> var_10397_equation_0 = const()[name = tensor<string, []>("op_10397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10397_cast_fp16 = einsum(equation = var_10397_equation_0, values = (var_10211_cast_fp16, var_10059_cast_fp16))[name = tensor<string, []>("op_10397_cast_fp16")];
+            tensor<fp16, []> var_10398_to_fp16 = const()[name = tensor<string, []>("op_10398_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1021_cast_fp16 = mul(x = var_10397_cast_fp16, y = var_10398_to_fp16)[name = tensor<string, []>("aw_chunk_1021_cast_fp16")];
+            tensor<string, []> var_10401_equation_0 = const()[name = tensor<string, []>("op_10401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10401_cast_fp16 = einsum(equation = var_10401_equation_0, values = (var_10211_cast_fp16, var_10066_cast_fp16))[name = tensor<string, []>("op_10401_cast_fp16")];
+            tensor<fp16, []> var_10402_to_fp16 = const()[name = tensor<string, []>("op_10402_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1023_cast_fp16 = mul(x = var_10401_cast_fp16, y = var_10402_to_fp16)[name = tensor<string, []>("aw_chunk_1023_cast_fp16")];
+            tensor<string, []> var_10405_equation_0 = const()[name = tensor<string, []>("op_10405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10405_cast_fp16 = einsum(equation = var_10405_equation_0, values = (var_10215_cast_fp16, var_10073_cast_fp16))[name = tensor<string, []>("op_10405_cast_fp16")];
+            tensor<fp16, []> var_10406_to_fp16 = const()[name = tensor<string, []>("op_10406_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1025_cast_fp16 = mul(x = var_10405_cast_fp16, y = var_10406_to_fp16)[name = tensor<string, []>("aw_chunk_1025_cast_fp16")];
+            tensor<string, []> var_10409_equation_0 = const()[name = tensor<string, []>("op_10409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10409_cast_fp16 = einsum(equation = var_10409_equation_0, values = (var_10215_cast_fp16, var_10080_cast_fp16))[name = tensor<string, []>("op_10409_cast_fp16")];
+            tensor<fp16, []> var_10410_to_fp16 = const()[name = tensor<string, []>("op_10410_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1027_cast_fp16 = mul(x = var_10409_cast_fp16, y = var_10410_to_fp16)[name = tensor<string, []>("aw_chunk_1027_cast_fp16")];
+            tensor<string, []> var_10413_equation_0 = const()[name = tensor<string, []>("op_10413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10413_cast_fp16 = einsum(equation = var_10413_equation_0, values = (var_10215_cast_fp16, var_10087_cast_fp16))[name = tensor<string, []>("op_10413_cast_fp16")];
+            tensor<fp16, []> var_10414_to_fp16 = const()[name = tensor<string, []>("op_10414_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1029_cast_fp16 = mul(x = var_10413_cast_fp16, y = var_10414_to_fp16)[name = tensor<string, []>("aw_chunk_1029_cast_fp16")];
+            tensor<string, []> var_10417_equation_0 = const()[name = tensor<string, []>("op_10417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10417_cast_fp16 = einsum(equation = var_10417_equation_0, values = (var_10215_cast_fp16, var_10094_cast_fp16))[name = tensor<string, []>("op_10417_cast_fp16")];
+            tensor<fp16, []> var_10418_to_fp16 = const()[name = tensor<string, []>("op_10418_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1031_cast_fp16 = mul(x = var_10417_cast_fp16, y = var_10418_to_fp16)[name = tensor<string, []>("aw_chunk_1031_cast_fp16")];
+            tensor<string, []> var_10421_equation_0 = const()[name = tensor<string, []>("op_10421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10421_cast_fp16 = einsum(equation = var_10421_equation_0, values = (var_10219_cast_fp16, var_10101_cast_fp16))[name = tensor<string, []>("op_10421_cast_fp16")];
+            tensor<fp16, []> var_10422_to_fp16 = const()[name = tensor<string, []>("op_10422_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1033_cast_fp16 = mul(x = var_10421_cast_fp16, y = var_10422_to_fp16)[name = tensor<string, []>("aw_chunk_1033_cast_fp16")];
+            tensor<string, []> var_10425_equation_0 = const()[name = tensor<string, []>("op_10425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10425_cast_fp16 = einsum(equation = var_10425_equation_0, values = (var_10219_cast_fp16, var_10108_cast_fp16))[name = tensor<string, []>("op_10425_cast_fp16")];
+            tensor<fp16, []> var_10426_to_fp16 = const()[name = tensor<string, []>("op_10426_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1035_cast_fp16 = mul(x = var_10425_cast_fp16, y = var_10426_to_fp16)[name = tensor<string, []>("aw_chunk_1035_cast_fp16")];
+            tensor<string, []> var_10429_equation_0 = const()[name = tensor<string, []>("op_10429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10429_cast_fp16 = einsum(equation = var_10429_equation_0, values = (var_10219_cast_fp16, var_10115_cast_fp16))[name = tensor<string, []>("op_10429_cast_fp16")];
+            tensor<fp16, []> var_10430_to_fp16 = const()[name = tensor<string, []>("op_10430_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1037_cast_fp16 = mul(x = var_10429_cast_fp16, y = var_10430_to_fp16)[name = tensor<string, []>("aw_chunk_1037_cast_fp16")];
+            tensor<string, []> var_10433_equation_0 = const()[name = tensor<string, []>("op_10433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10433_cast_fp16 = einsum(equation = var_10433_equation_0, values = (var_10219_cast_fp16, var_10122_cast_fp16))[name = tensor<string, []>("op_10433_cast_fp16")];
+            tensor<fp16, []> var_10434_to_fp16 = const()[name = tensor<string, []>("op_10434_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1039_cast_fp16 = mul(x = var_10433_cast_fp16, y = var_10434_to_fp16)[name = tensor<string, []>("aw_chunk_1039_cast_fp16")];
+            tensor<string, []> var_10437_equation_0 = const()[name = tensor<string, []>("op_10437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10437_cast_fp16 = einsum(equation = var_10437_equation_0, values = (var_10223_cast_fp16, var_10129_cast_fp16))[name = tensor<string, []>("op_10437_cast_fp16")];
+            tensor<fp16, []> var_10438_to_fp16 = const()[name = tensor<string, []>("op_10438_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1041_cast_fp16 = mul(x = var_10437_cast_fp16, y = var_10438_to_fp16)[name = tensor<string, []>("aw_chunk_1041_cast_fp16")];
+            tensor<string, []> var_10441_equation_0 = const()[name = tensor<string, []>("op_10441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10441_cast_fp16 = einsum(equation = var_10441_equation_0, values = (var_10223_cast_fp16, var_10136_cast_fp16))[name = tensor<string, []>("op_10441_cast_fp16")];
+            tensor<fp16, []> var_10442_to_fp16 = const()[name = tensor<string, []>("op_10442_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1043_cast_fp16 = mul(x = var_10441_cast_fp16, y = var_10442_to_fp16)[name = tensor<string, []>("aw_chunk_1043_cast_fp16")];
+            tensor<string, []> var_10445_equation_0 = const()[name = tensor<string, []>("op_10445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10445_cast_fp16 = einsum(equation = var_10445_equation_0, values = (var_10223_cast_fp16, var_10143_cast_fp16))[name = tensor<string, []>("op_10445_cast_fp16")];
+            tensor<fp16, []> var_10446_to_fp16 = const()[name = tensor<string, []>("op_10446_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1045_cast_fp16 = mul(x = var_10445_cast_fp16, y = var_10446_to_fp16)[name = tensor<string, []>("aw_chunk_1045_cast_fp16")];
+            tensor<string, []> var_10449_equation_0 = const()[name = tensor<string, []>("op_10449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10449_cast_fp16 = einsum(equation = var_10449_equation_0, values = (var_10223_cast_fp16, var_10150_cast_fp16))[name = tensor<string, []>("op_10449_cast_fp16")];
+            tensor<fp16, []> var_10450_to_fp16 = const()[name = tensor<string, []>("op_10450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1047_cast_fp16 = mul(x = var_10449_cast_fp16, y = var_10450_to_fp16)[name = tensor<string, []>("aw_chunk_1047_cast_fp16")];
+            tensor<string, []> var_10453_equation_0 = const()[name = tensor<string, []>("op_10453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10453_cast_fp16 = einsum(equation = var_10453_equation_0, values = (var_10227_cast_fp16, var_10157_cast_fp16))[name = tensor<string, []>("op_10453_cast_fp16")];
+            tensor<fp16, []> var_10454_to_fp16 = const()[name = tensor<string, []>("op_10454_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1049_cast_fp16 = mul(x = var_10453_cast_fp16, y = var_10454_to_fp16)[name = tensor<string, []>("aw_chunk_1049_cast_fp16")];
+            tensor<string, []> var_10457_equation_0 = const()[name = tensor<string, []>("op_10457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10457_cast_fp16 = einsum(equation = var_10457_equation_0, values = (var_10227_cast_fp16, var_10164_cast_fp16))[name = tensor<string, []>("op_10457_cast_fp16")];
+            tensor<fp16, []> var_10458_to_fp16 = const()[name = tensor<string, []>("op_10458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1051_cast_fp16 = mul(x = var_10457_cast_fp16, y = var_10458_to_fp16)[name = tensor<string, []>("aw_chunk_1051_cast_fp16")];
+            tensor<string, []> var_10461_equation_0 = const()[name = tensor<string, []>("op_10461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10461_cast_fp16 = einsum(equation = var_10461_equation_0, values = (var_10227_cast_fp16, var_10171_cast_fp16))[name = tensor<string, []>("op_10461_cast_fp16")];
+            tensor<fp16, []> var_10462_to_fp16 = const()[name = tensor<string, []>("op_10462_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1053_cast_fp16 = mul(x = var_10461_cast_fp16, y = var_10462_to_fp16)[name = tensor<string, []>("aw_chunk_1053_cast_fp16")];
+            tensor<string, []> var_10465_equation_0 = const()[name = tensor<string, []>("op_10465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10465_cast_fp16 = einsum(equation = var_10465_equation_0, values = (var_10227_cast_fp16, var_10178_cast_fp16))[name = tensor<string, []>("op_10465_cast_fp16")];
+            tensor<fp16, []> var_10466_to_fp16 = const()[name = tensor<string, []>("op_10466_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1055_cast_fp16 = mul(x = var_10465_cast_fp16, y = var_10466_to_fp16)[name = tensor<string, []>("aw_chunk_1055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10468_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_961_cast_fp16)[name = tensor<string, []>("op_10468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10469_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_963_cast_fp16)[name = tensor<string, []>("op_10469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10470_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_965_cast_fp16)[name = tensor<string, []>("op_10470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10471_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_967_cast_fp16)[name = tensor<string, []>("op_10471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10472_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_969_cast_fp16)[name = tensor<string, []>("op_10472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10473_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_971_cast_fp16)[name = tensor<string, []>("op_10473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10474_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_973_cast_fp16)[name = tensor<string, []>("op_10474_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10475_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_975_cast_fp16)[name = tensor<string, []>("op_10475_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10476_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_977_cast_fp16)[name = tensor<string, []>("op_10476_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10477_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_979_cast_fp16)[name = tensor<string, []>("op_10477_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10478_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_981_cast_fp16)[name = tensor<string, []>("op_10478_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10479_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_983_cast_fp16)[name = tensor<string, []>("op_10479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10480_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_985_cast_fp16)[name = tensor<string, []>("op_10480_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10481_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_987_cast_fp16)[name = tensor<string, []>("op_10481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10482_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_989_cast_fp16)[name = tensor<string, []>("op_10482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10483_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_991_cast_fp16)[name = tensor<string, []>("op_10483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10484_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_993_cast_fp16)[name = tensor<string, []>("op_10484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10485_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_995_cast_fp16)[name = tensor<string, []>("op_10485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10486_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_997_cast_fp16)[name = tensor<string, []>("op_10486_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10487_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_999_cast_fp16)[name = tensor<string, []>("op_10487_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10488_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1001_cast_fp16)[name = tensor<string, []>("op_10488_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10489_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1003_cast_fp16)[name = tensor<string, []>("op_10489_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10490_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1005_cast_fp16)[name = tensor<string, []>("op_10490_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10491_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1007_cast_fp16)[name = tensor<string, []>("op_10491_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10492_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1009_cast_fp16)[name = tensor<string, []>("op_10492_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10493_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1011_cast_fp16)[name = tensor<string, []>("op_10493_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10494_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1013_cast_fp16)[name = tensor<string, []>("op_10494_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10495_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1015_cast_fp16)[name = tensor<string, []>("op_10495_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10496_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1017_cast_fp16)[name = tensor<string, []>("op_10496_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10497_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1019_cast_fp16)[name = tensor<string, []>("op_10497_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10498_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1021_cast_fp16)[name = tensor<string, []>("op_10498_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10499_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1023_cast_fp16)[name = tensor<string, []>("op_10499_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10500_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1025_cast_fp16)[name = tensor<string, []>("op_10500_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10501_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1027_cast_fp16)[name = tensor<string, []>("op_10501_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10502_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1029_cast_fp16)[name = tensor<string, []>("op_10502_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10503_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1031_cast_fp16)[name = tensor<string, []>("op_10503_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10504_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1033_cast_fp16)[name = tensor<string, []>("op_10504_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10505_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1035_cast_fp16)[name = tensor<string, []>("op_10505_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10506_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1037_cast_fp16)[name = tensor<string, []>("op_10506_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10507_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1039_cast_fp16)[name = tensor<string, []>("op_10507_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10508_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1041_cast_fp16)[name = tensor<string, []>("op_10508_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10509_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1043_cast_fp16)[name = tensor<string, []>("op_10509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10510_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1045_cast_fp16)[name = tensor<string, []>("op_10510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10511_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1047_cast_fp16)[name = tensor<string, []>("op_10511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10512_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1049_cast_fp16)[name = tensor<string, []>("op_10512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10513_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1051_cast_fp16)[name = tensor<string, []>("op_10513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10514_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1053_cast_fp16)[name = tensor<string, []>("op_10514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10515_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1055_cast_fp16)[name = tensor<string, []>("op_10515_cast_fp16")];
+            tensor<string, []> var_10517_equation_0 = const()[name = tensor<string, []>("op_10517_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10517_cast_fp16 = einsum(equation = var_10517_equation_0, values = (var_10229_cast_fp16, var_10468_cast_fp16))[name = tensor<string, []>("op_10517_cast_fp16")];
+            tensor<string, []> var_10519_equation_0 = const()[name = tensor<string, []>("op_10519_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10519_cast_fp16 = einsum(equation = var_10519_equation_0, values = (var_10229_cast_fp16, var_10469_cast_fp16))[name = tensor<string, []>("op_10519_cast_fp16")];
+            tensor<string, []> var_10521_equation_0 = const()[name = tensor<string, []>("op_10521_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10521_cast_fp16 = einsum(equation = var_10521_equation_0, values = (var_10229_cast_fp16, var_10470_cast_fp16))[name = tensor<string, []>("op_10521_cast_fp16")];
+            tensor<string, []> var_10523_equation_0 = const()[name = tensor<string, []>("op_10523_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10523_cast_fp16 = einsum(equation = var_10523_equation_0, values = (var_10229_cast_fp16, var_10471_cast_fp16))[name = tensor<string, []>("op_10523_cast_fp16")];
+            tensor<string, []> var_10525_equation_0 = const()[name = tensor<string, []>("op_10525_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10525_cast_fp16 = einsum(equation = var_10525_equation_0, values = (var_10233_cast_fp16, var_10472_cast_fp16))[name = tensor<string, []>("op_10525_cast_fp16")];
+            tensor<string, []> var_10527_equation_0 = const()[name = tensor<string, []>("op_10527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10527_cast_fp16 = einsum(equation = var_10527_equation_0, values = (var_10233_cast_fp16, var_10473_cast_fp16))[name = tensor<string, []>("op_10527_cast_fp16")];
+            tensor<string, []> var_10529_equation_0 = const()[name = tensor<string, []>("op_10529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10529_cast_fp16 = einsum(equation = var_10529_equation_0, values = (var_10233_cast_fp16, var_10474_cast_fp16))[name = tensor<string, []>("op_10529_cast_fp16")];
+            tensor<string, []> var_10531_equation_0 = const()[name = tensor<string, []>("op_10531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10531_cast_fp16 = einsum(equation = var_10531_equation_0, values = (var_10233_cast_fp16, var_10475_cast_fp16))[name = tensor<string, []>("op_10531_cast_fp16")];
+            tensor<string, []> var_10533_equation_0 = const()[name = tensor<string, []>("op_10533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10533_cast_fp16 = einsum(equation = var_10533_equation_0, values = (var_10237_cast_fp16, var_10476_cast_fp16))[name = tensor<string, []>("op_10533_cast_fp16")];
+            tensor<string, []> var_10535_equation_0 = const()[name = tensor<string, []>("op_10535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10535_cast_fp16 = einsum(equation = var_10535_equation_0, values = (var_10237_cast_fp16, var_10477_cast_fp16))[name = tensor<string, []>("op_10535_cast_fp16")];
+            tensor<string, []> var_10537_equation_0 = const()[name = tensor<string, []>("op_10537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10537_cast_fp16 = einsum(equation = var_10537_equation_0, values = (var_10237_cast_fp16, var_10478_cast_fp16))[name = tensor<string, []>("op_10537_cast_fp16")];
+            tensor<string, []> var_10539_equation_0 = const()[name = tensor<string, []>("op_10539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10539_cast_fp16 = einsum(equation = var_10539_equation_0, values = (var_10237_cast_fp16, var_10479_cast_fp16))[name = tensor<string, []>("op_10539_cast_fp16")];
+            tensor<string, []> var_10541_equation_0 = const()[name = tensor<string, []>("op_10541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10541_cast_fp16 = einsum(equation = var_10541_equation_0, values = (var_10241_cast_fp16, var_10480_cast_fp16))[name = tensor<string, []>("op_10541_cast_fp16")];
+            tensor<string, []> var_10543_equation_0 = const()[name = tensor<string, []>("op_10543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10543_cast_fp16 = einsum(equation = var_10543_equation_0, values = (var_10241_cast_fp16, var_10481_cast_fp16))[name = tensor<string, []>("op_10543_cast_fp16")];
+            tensor<string, []> var_10545_equation_0 = const()[name = tensor<string, []>("op_10545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10545_cast_fp16 = einsum(equation = var_10545_equation_0, values = (var_10241_cast_fp16, var_10482_cast_fp16))[name = tensor<string, []>("op_10545_cast_fp16")];
+            tensor<string, []> var_10547_equation_0 = const()[name = tensor<string, []>("op_10547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10547_cast_fp16 = einsum(equation = var_10547_equation_0, values = (var_10241_cast_fp16, var_10483_cast_fp16))[name = tensor<string, []>("op_10547_cast_fp16")];
+            tensor<string, []> var_10549_equation_0 = const()[name = tensor<string, []>("op_10549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10549_cast_fp16 = einsum(equation = var_10549_equation_0, values = (var_10245_cast_fp16, var_10484_cast_fp16))[name = tensor<string, []>("op_10549_cast_fp16")];
+            tensor<string, []> var_10551_equation_0 = const()[name = tensor<string, []>("op_10551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10551_cast_fp16 = einsum(equation = var_10551_equation_0, values = (var_10245_cast_fp16, var_10485_cast_fp16))[name = tensor<string, []>("op_10551_cast_fp16")];
+            tensor<string, []> var_10553_equation_0 = const()[name = tensor<string, []>("op_10553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10553_cast_fp16 = einsum(equation = var_10553_equation_0, values = (var_10245_cast_fp16, var_10486_cast_fp16))[name = tensor<string, []>("op_10553_cast_fp16")];
+            tensor<string, []> var_10555_equation_0 = const()[name = tensor<string, []>("op_10555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10555_cast_fp16 = einsum(equation = var_10555_equation_0, values = (var_10245_cast_fp16, var_10487_cast_fp16))[name = tensor<string, []>("op_10555_cast_fp16")];
+            tensor<string, []> var_10557_equation_0 = const()[name = tensor<string, []>("op_10557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10557_cast_fp16 = einsum(equation = var_10557_equation_0, values = (var_10249_cast_fp16, var_10488_cast_fp16))[name = tensor<string, []>("op_10557_cast_fp16")];
+            tensor<string, []> var_10559_equation_0 = const()[name = tensor<string, []>("op_10559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10559_cast_fp16 = einsum(equation = var_10559_equation_0, values = (var_10249_cast_fp16, var_10489_cast_fp16))[name = tensor<string, []>("op_10559_cast_fp16")];
+            tensor<string, []> var_10561_equation_0 = const()[name = tensor<string, []>("op_10561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10561_cast_fp16 = einsum(equation = var_10561_equation_0, values = (var_10249_cast_fp16, var_10490_cast_fp16))[name = tensor<string, []>("op_10561_cast_fp16")];
+            tensor<string, []> var_10563_equation_0 = const()[name = tensor<string, []>("op_10563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10563_cast_fp16 = einsum(equation = var_10563_equation_0, values = (var_10249_cast_fp16, var_10491_cast_fp16))[name = tensor<string, []>("op_10563_cast_fp16")];
+            tensor<string, []> var_10565_equation_0 = const()[name = tensor<string, []>("op_10565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10565_cast_fp16 = einsum(equation = var_10565_equation_0, values = (var_10253_cast_fp16, var_10492_cast_fp16))[name = tensor<string, []>("op_10565_cast_fp16")];
+            tensor<string, []> var_10567_equation_0 = const()[name = tensor<string, []>("op_10567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10567_cast_fp16 = einsum(equation = var_10567_equation_0, values = (var_10253_cast_fp16, var_10493_cast_fp16))[name = tensor<string, []>("op_10567_cast_fp16")];
+            tensor<string, []> var_10569_equation_0 = const()[name = tensor<string, []>("op_10569_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10569_cast_fp16 = einsum(equation = var_10569_equation_0, values = (var_10253_cast_fp16, var_10494_cast_fp16))[name = tensor<string, []>("op_10569_cast_fp16")];
+            tensor<string, []> var_10571_equation_0 = const()[name = tensor<string, []>("op_10571_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10571_cast_fp16 = einsum(equation = var_10571_equation_0, values = (var_10253_cast_fp16, var_10495_cast_fp16))[name = tensor<string, []>("op_10571_cast_fp16")];
+            tensor<string, []> var_10573_equation_0 = const()[name = tensor<string, []>("op_10573_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10573_cast_fp16 = einsum(equation = var_10573_equation_0, values = (var_10257_cast_fp16, var_10496_cast_fp16))[name = tensor<string, []>("op_10573_cast_fp16")];
+            tensor<string, []> var_10575_equation_0 = const()[name = tensor<string, []>("op_10575_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10575_cast_fp16 = einsum(equation = var_10575_equation_0, values = (var_10257_cast_fp16, var_10497_cast_fp16))[name = tensor<string, []>("op_10575_cast_fp16")];
+            tensor<string, []> var_10577_equation_0 = const()[name = tensor<string, []>("op_10577_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10577_cast_fp16 = einsum(equation = var_10577_equation_0, values = (var_10257_cast_fp16, var_10498_cast_fp16))[name = tensor<string, []>("op_10577_cast_fp16")];
+            tensor<string, []> var_10579_equation_0 = const()[name = tensor<string, []>("op_10579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10579_cast_fp16 = einsum(equation = var_10579_equation_0, values = (var_10257_cast_fp16, var_10499_cast_fp16))[name = tensor<string, []>("op_10579_cast_fp16")];
+            tensor<string, []> var_10581_equation_0 = const()[name = tensor<string, []>("op_10581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10581_cast_fp16 = einsum(equation = var_10581_equation_0, values = (var_10261_cast_fp16, var_10500_cast_fp16))[name = tensor<string, []>("op_10581_cast_fp16")];
+            tensor<string, []> var_10583_equation_0 = const()[name = tensor<string, []>("op_10583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10583_cast_fp16 = einsum(equation = var_10583_equation_0, values = (var_10261_cast_fp16, var_10501_cast_fp16))[name = tensor<string, []>("op_10583_cast_fp16")];
+            tensor<string, []> var_10585_equation_0 = const()[name = tensor<string, []>("op_10585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10585_cast_fp16 = einsum(equation = var_10585_equation_0, values = (var_10261_cast_fp16, var_10502_cast_fp16))[name = tensor<string, []>("op_10585_cast_fp16")];
+            tensor<string, []> var_10587_equation_0 = const()[name = tensor<string, []>("op_10587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10587_cast_fp16 = einsum(equation = var_10587_equation_0, values = (var_10261_cast_fp16, var_10503_cast_fp16))[name = tensor<string, []>("op_10587_cast_fp16")];
+            tensor<string, []> var_10589_equation_0 = const()[name = tensor<string, []>("op_10589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10589_cast_fp16 = einsum(equation = var_10589_equation_0, values = (var_10265_cast_fp16, var_10504_cast_fp16))[name = tensor<string, []>("op_10589_cast_fp16")];
+            tensor<string, []> var_10591_equation_0 = const()[name = tensor<string, []>("op_10591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10591_cast_fp16 = einsum(equation = var_10591_equation_0, values = (var_10265_cast_fp16, var_10505_cast_fp16))[name = tensor<string, []>("op_10591_cast_fp16")];
+            tensor<string, []> var_10593_equation_0 = const()[name = tensor<string, []>("op_10593_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10593_cast_fp16 = einsum(equation = var_10593_equation_0, values = (var_10265_cast_fp16, var_10506_cast_fp16))[name = tensor<string, []>("op_10593_cast_fp16")];
+            tensor<string, []> var_10595_equation_0 = const()[name = tensor<string, []>("op_10595_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10595_cast_fp16 = einsum(equation = var_10595_equation_0, values = (var_10265_cast_fp16, var_10507_cast_fp16))[name = tensor<string, []>("op_10595_cast_fp16")];
+            tensor<string, []> var_10597_equation_0 = const()[name = tensor<string, []>("op_10597_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10597_cast_fp16 = einsum(equation = var_10597_equation_0, values = (var_10269_cast_fp16, var_10508_cast_fp16))[name = tensor<string, []>("op_10597_cast_fp16")];
+            tensor<string, []> var_10599_equation_0 = const()[name = tensor<string, []>("op_10599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10599_cast_fp16 = einsum(equation = var_10599_equation_0, values = (var_10269_cast_fp16, var_10509_cast_fp16))[name = tensor<string, []>("op_10599_cast_fp16")];
+            tensor<string, []> var_10601_equation_0 = const()[name = tensor<string, []>("op_10601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10601_cast_fp16 = einsum(equation = var_10601_equation_0, values = (var_10269_cast_fp16, var_10510_cast_fp16))[name = tensor<string, []>("op_10601_cast_fp16")];
+            tensor<string, []> var_10603_equation_0 = const()[name = tensor<string, []>("op_10603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10603_cast_fp16 = einsum(equation = var_10603_equation_0, values = (var_10269_cast_fp16, var_10511_cast_fp16))[name = tensor<string, []>("op_10603_cast_fp16")];
+            tensor<string, []> var_10605_equation_0 = const()[name = tensor<string, []>("op_10605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10605_cast_fp16 = einsum(equation = var_10605_equation_0, values = (var_10273_cast_fp16, var_10512_cast_fp16))[name = tensor<string, []>("op_10605_cast_fp16")];
+            tensor<string, []> var_10607_equation_0 = const()[name = tensor<string, []>("op_10607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10607_cast_fp16 = einsum(equation = var_10607_equation_0, values = (var_10273_cast_fp16, var_10513_cast_fp16))[name = tensor<string, []>("op_10607_cast_fp16")];
+            tensor<string, []> var_10609_equation_0 = const()[name = tensor<string, []>("op_10609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10609_cast_fp16 = einsum(equation = var_10609_equation_0, values = (var_10273_cast_fp16, var_10514_cast_fp16))[name = tensor<string, []>("op_10609_cast_fp16")];
+            tensor<string, []> var_10611_equation_0 = const()[name = tensor<string, []>("op_10611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10611_cast_fp16 = einsum(equation = var_10611_equation_0, values = (var_10273_cast_fp16, var_10515_cast_fp16))[name = tensor<string, []>("op_10611_cast_fp16")];
+            tensor<bool, []> var_10613_interleave_0 = const()[name = tensor<string, []>("op_10613_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10613_cast_fp16 = concat(axis = var_9724, interleave = var_10613_interleave_0, values = (var_10517_cast_fp16, var_10519_cast_fp16, var_10521_cast_fp16, var_10523_cast_fp16))[name = tensor<string, []>("op_10613_cast_fp16")];
+            tensor<bool, []> var_10615_interleave_0 = const()[name = tensor<string, []>("op_10615_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10615_cast_fp16 = concat(axis = var_9724, interleave = var_10615_interleave_0, values = (var_10525_cast_fp16, var_10527_cast_fp16, var_10529_cast_fp16, var_10531_cast_fp16))[name = tensor<string, []>("op_10615_cast_fp16")];
+            tensor<bool, []> var_10617_interleave_0 = const()[name = tensor<string, []>("op_10617_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10617_cast_fp16 = concat(axis = var_9724, interleave = var_10617_interleave_0, values = (var_10533_cast_fp16, var_10535_cast_fp16, var_10537_cast_fp16, var_10539_cast_fp16))[name = tensor<string, []>("op_10617_cast_fp16")];
+            tensor<bool, []> var_10619_interleave_0 = const()[name = tensor<string, []>("op_10619_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10619_cast_fp16 = concat(axis = var_9724, interleave = var_10619_interleave_0, values = (var_10541_cast_fp16, var_10543_cast_fp16, var_10545_cast_fp16, var_10547_cast_fp16))[name = tensor<string, []>("op_10619_cast_fp16")];
+            tensor<bool, []> var_10621_interleave_0 = const()[name = tensor<string, []>("op_10621_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10621_cast_fp16 = concat(axis = var_9724, interleave = var_10621_interleave_0, values = (var_10549_cast_fp16, var_10551_cast_fp16, var_10553_cast_fp16, var_10555_cast_fp16))[name = tensor<string, []>("op_10621_cast_fp16")];
+            tensor<bool, []> var_10623_interleave_0 = const()[name = tensor<string, []>("op_10623_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10623_cast_fp16 = concat(axis = var_9724, interleave = var_10623_interleave_0, values = (var_10557_cast_fp16, var_10559_cast_fp16, var_10561_cast_fp16, var_10563_cast_fp16))[name = tensor<string, []>("op_10623_cast_fp16")];
+            tensor<bool, []> var_10625_interleave_0 = const()[name = tensor<string, []>("op_10625_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10625_cast_fp16 = concat(axis = var_9724, interleave = var_10625_interleave_0, values = (var_10565_cast_fp16, var_10567_cast_fp16, var_10569_cast_fp16, var_10571_cast_fp16))[name = tensor<string, []>("op_10625_cast_fp16")];
+            tensor<bool, []> var_10627_interleave_0 = const()[name = tensor<string, []>("op_10627_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10627_cast_fp16 = concat(axis = var_9724, interleave = var_10627_interleave_0, values = (var_10573_cast_fp16, var_10575_cast_fp16, var_10577_cast_fp16, var_10579_cast_fp16))[name = tensor<string, []>("op_10627_cast_fp16")];
+            tensor<bool, []> var_10629_interleave_0 = const()[name = tensor<string, []>("op_10629_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10629_cast_fp16 = concat(axis = var_9724, interleave = var_10629_interleave_0, values = (var_10581_cast_fp16, var_10583_cast_fp16, var_10585_cast_fp16, var_10587_cast_fp16))[name = tensor<string, []>("op_10629_cast_fp16")];
+            tensor<bool, []> var_10631_interleave_0 = const()[name = tensor<string, []>("op_10631_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10631_cast_fp16 = concat(axis = var_9724, interleave = var_10631_interleave_0, values = (var_10589_cast_fp16, var_10591_cast_fp16, var_10593_cast_fp16, var_10595_cast_fp16))[name = tensor<string, []>("op_10631_cast_fp16")];
+            tensor<bool, []> var_10633_interleave_0 = const()[name = tensor<string, []>("op_10633_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10633_cast_fp16 = concat(axis = var_9724, interleave = var_10633_interleave_0, values = (var_10597_cast_fp16, var_10599_cast_fp16, var_10601_cast_fp16, var_10603_cast_fp16))[name = tensor<string, []>("op_10633_cast_fp16")];
+            tensor<bool, []> var_10635_interleave_0 = const()[name = tensor<string, []>("op_10635_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10635_cast_fp16 = concat(axis = var_9724, interleave = var_10635_interleave_0, values = (var_10605_cast_fp16, var_10607_cast_fp16, var_10609_cast_fp16, var_10611_cast_fp16))[name = tensor<string, []>("op_10635_cast_fp16")];
+            tensor<bool, []> input_81_interleave_0 = const()[name = tensor<string, []>("input_81_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_81_cast_fp16 = concat(axis = var_9741, interleave = input_81_interleave_0, values = (var_10613_cast_fp16, var_10615_cast_fp16, var_10617_cast_fp16, var_10619_cast_fp16, var_10621_cast_fp16, var_10623_cast_fp16, var_10625_cast_fp16, var_10627_cast_fp16, var_10629_cast_fp16, var_10631_cast_fp16, var_10633_cast_fp16, var_10635_cast_fp16))[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<int32, [2]> var_10640 = const()[name = tensor<string, []>("op_10640"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10642 = const()[name = tensor<string, []>("op_10642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_43_pad_type_0 = const()[name = tensor<string, []>("obj_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_43_pad_0 = const()[name = tensor<string, []>("obj_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151515456)))];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152695168)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_43_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = var_10642, groups = var_9741, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = var_10640, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> var_10648 = const()[name = tensor<string, []>("op_10648"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_43_cast_fp16 = reduce_mean(axes = var_10648, keep_dims = var_9742, x = inputs_43_cast_fp16)[name = tensor<string, []>("channels_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_43_cast_fp16 = sub(x = inputs_43_cast_fp16, y = channels_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = zero_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_sq_43_cast_fp16")];
+            tensor<int32, [1]> var_10652 = const()[name = tensor<string, []>("op_10652"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_10653_cast_fp16 = reduce_mean(axes = var_10652, keep_dims = var_9742, x = zero_mean_sq_43_cast_fp16)[name = tensor<string, []>("op_10653_cast_fp16")];
+            tensor<fp16, []> var_10654_to_fp16 = const()[name = tensor<string, []>("op_10654_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_10655_cast_fp16 = add(x = var_10653_cast_fp16, y = var_10654_to_fp16)[name = tensor<string, []>("op_10655_cast_fp16")];
+            tensor<fp16, []> denom_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0_to_fp16, x = var_10655_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = denom_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152696768)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152698368)))];
+            tensor<fp16, []> input_83_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_83_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<int32, [2]> var_10666 = const()[name = tensor<string, []>("op_10666"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10668 = const()[name = tensor<string, []>("op_10668"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_85_pad_type_0 = const()[name = tensor<string, []>("input_85_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_85_pad_0 = const()[name = tensor<string, []>("input_85_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152699968)))];
+            tensor<fp16, [3072]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157418624)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_85_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = var_10668, groups = var_9741, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = var_10666, weight = layers_10_fc1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> input_87_mode_0 = const()[name = tensor<string, []>("input_87_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<int32, [2]> var_10674 = const()[name = tensor<string, []>("op_10674"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10676 = const()[name = tensor<string, []>("op_10676"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157424832)))];
+            tensor<fp16, [768]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162143488)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_25_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = var_10676, groups = var_9741, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_10674, weight = layers_10_fc2_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_10683 = const()[name = tensor<string, []>("op_10683"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_10700 = const()[name = tensor<string, []>("op_10700"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_10701 = const()[name = tensor<string, []>("op_10701"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_10711 = const()[name = tensor<string, []>("op_10711"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_45_cast_fp16 = reduce_mean(axes = var_10711, keep_dims = var_10701, x = inputs_45_cast_fp16)[name = tensor<string, []>("channels_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_45_cast_fp16 = sub(x = inputs_45_cast_fp16, y = channels_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = zero_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_sq_45_cast_fp16")];
+            tensor<int32, [1]> var_10715 = const()[name = tensor<string, []>("op_10715"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_10716_cast_fp16 = reduce_mean(axes = var_10715, keep_dims = var_10701, x = zero_mean_sq_45_cast_fp16)[name = tensor<string, []>("op_10716_cast_fp16")];
+            tensor<fp16, []> var_10717_to_fp16 = const()[name = tensor<string, []>("op_10717_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_10718_cast_fp16 = add(x = var_10716_cast_fp16, y = var_10717_to_fp16)[name = tensor<string, []>("op_10718_cast_fp16")];
+            tensor<fp16, []> denom_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0_to_fp16, x = var_10718_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = denom_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162145088)))];
+            tensor<fp16, [768]> obj_45_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162146688)))];
+            tensor<fp16, []> obj_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_45_cast_fp16")];
+            tensor<int32, [2]> var_10733 = const()[name = tensor<string, []>("op_10733"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10735 = const()[name = tensor<string, []>("op_10735"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162148288)))];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163328000)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = var_10735, groups = var_10700, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_10733, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_10739 = const()[name = tensor<string, []>("op_10739"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10741 = const()[name = tensor<string, []>("op_10741"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163329600)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = conv(dilations = var_10741, groups = var_10700, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_10739, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_10746 = const()[name = tensor<string, []>("op_10746"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10748 = const()[name = tensor<string, []>("op_10748"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164509312)))];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165689024)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = var_10748, groups = var_10700, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_10746, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_10755_begin_0 = const()[name = tensor<string, []>("op_10755_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10755_end_0 = const()[name = tensor<string, []>("op_10755_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10755_end_mask_0 = const()[name = tensor<string, []>("op_10755_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10755_cast_fp16 = slice_by_index(begin = var_10755_begin_0, end = var_10755_end_0, end_mask = var_10755_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10755_cast_fp16")];
+            tensor<int32, [4]> var_10759_begin_0 = const()[name = tensor<string, []>("op_10759_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_10759_end_0 = const()[name = tensor<string, []>("op_10759_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_10759_end_mask_0 = const()[name = tensor<string, []>("op_10759_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10759_cast_fp16 = slice_by_index(begin = var_10759_begin_0, end = var_10759_end_0, end_mask = var_10759_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10759_cast_fp16")];
+            tensor<int32, [4]> var_10763_begin_0 = const()[name = tensor<string, []>("op_10763_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_10763_end_0 = const()[name = tensor<string, []>("op_10763_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_10763_end_mask_0 = const()[name = tensor<string, []>("op_10763_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10763_cast_fp16 = slice_by_index(begin = var_10763_begin_0, end = var_10763_end_0, end_mask = var_10763_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10763_cast_fp16")];
+            tensor<int32, [4]> var_10767_begin_0 = const()[name = tensor<string, []>("op_10767_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_10767_end_0 = const()[name = tensor<string, []>("op_10767_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_10767_end_mask_0 = const()[name = tensor<string, []>("op_10767_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10767_cast_fp16 = slice_by_index(begin = var_10767_begin_0, end = var_10767_end_0, end_mask = var_10767_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10767_cast_fp16")];
+            tensor<int32, [4]> var_10771_begin_0 = const()[name = tensor<string, []>("op_10771_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_10771_end_0 = const()[name = tensor<string, []>("op_10771_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_10771_end_mask_0 = const()[name = tensor<string, []>("op_10771_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10771_cast_fp16 = slice_by_index(begin = var_10771_begin_0, end = var_10771_end_0, end_mask = var_10771_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10771_cast_fp16")];
+            tensor<int32, [4]> var_10775_begin_0 = const()[name = tensor<string, []>("op_10775_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_10775_end_0 = const()[name = tensor<string, []>("op_10775_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_10775_end_mask_0 = const()[name = tensor<string, []>("op_10775_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10775_cast_fp16 = slice_by_index(begin = var_10775_begin_0, end = var_10775_end_0, end_mask = var_10775_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10775_cast_fp16")];
+            tensor<int32, [4]> var_10779_begin_0 = const()[name = tensor<string, []>("op_10779_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_10779_end_0 = const()[name = tensor<string, []>("op_10779_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_10779_end_mask_0 = const()[name = tensor<string, []>("op_10779_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10779_cast_fp16 = slice_by_index(begin = var_10779_begin_0, end = var_10779_end_0, end_mask = var_10779_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10779_cast_fp16")];
+            tensor<int32, [4]> var_10783_begin_0 = const()[name = tensor<string, []>("op_10783_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_10783_end_0 = const()[name = tensor<string, []>("op_10783_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_10783_end_mask_0 = const()[name = tensor<string, []>("op_10783_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10783_cast_fp16 = slice_by_index(begin = var_10783_begin_0, end = var_10783_end_0, end_mask = var_10783_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10783_cast_fp16")];
+            tensor<int32, [4]> var_10787_begin_0 = const()[name = tensor<string, []>("op_10787_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_10787_end_0 = const()[name = tensor<string, []>("op_10787_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_10787_end_mask_0 = const()[name = tensor<string, []>("op_10787_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10787_cast_fp16 = slice_by_index(begin = var_10787_begin_0, end = var_10787_end_0, end_mask = var_10787_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10787_cast_fp16")];
+            tensor<int32, [4]> var_10791_begin_0 = const()[name = tensor<string, []>("op_10791_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_10791_end_0 = const()[name = tensor<string, []>("op_10791_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_10791_end_mask_0 = const()[name = tensor<string, []>("op_10791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10791_cast_fp16 = slice_by_index(begin = var_10791_begin_0, end = var_10791_end_0, end_mask = var_10791_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10791_cast_fp16")];
+            tensor<int32, [4]> var_10795_begin_0 = const()[name = tensor<string, []>("op_10795_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_10795_end_0 = const()[name = tensor<string, []>("op_10795_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_10795_end_mask_0 = const()[name = tensor<string, []>("op_10795_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10795_cast_fp16 = slice_by_index(begin = var_10795_begin_0, end = var_10795_end_0, end_mask = var_10795_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10795_cast_fp16")];
+            tensor<int32, [4]> var_10799_begin_0 = const()[name = tensor<string, []>("op_10799_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_10799_end_0 = const()[name = tensor<string, []>("op_10799_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_10799_end_mask_0 = const()[name = tensor<string, []>("op_10799_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10799_cast_fp16 = slice_by_index(begin = var_10799_begin_0, end = var_10799_end_0, end_mask = var_10799_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10799_cast_fp16")];
+            tensor<int32, [4]> var_10808_begin_0 = const()[name = tensor<string, []>("op_10808_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10808_end_0 = const()[name = tensor<string, []>("op_10808_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10808_end_mask_0 = const()[name = tensor<string, []>("op_10808_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10808_cast_fp16 = slice_by_index(begin = var_10808_begin_0, end = var_10808_end_0, end_mask = var_10808_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10808_cast_fp16")];
+            tensor<int32, [4]> var_10815_begin_0 = const()[name = tensor<string, []>("op_10815_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10815_end_0 = const()[name = tensor<string, []>("op_10815_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10815_end_mask_0 = const()[name = tensor<string, []>("op_10815_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10815_cast_fp16 = slice_by_index(begin = var_10815_begin_0, end = var_10815_end_0, end_mask = var_10815_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10815_cast_fp16")];
+            tensor<int32, [4]> var_10822_begin_0 = const()[name = tensor<string, []>("op_10822_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10822_end_0 = const()[name = tensor<string, []>("op_10822_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10822_end_mask_0 = const()[name = tensor<string, []>("op_10822_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10822_cast_fp16 = slice_by_index(begin = var_10822_begin_0, end = var_10822_end_0, end_mask = var_10822_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10822_cast_fp16")];
+            tensor<int32, [4]> var_10829_begin_0 = const()[name = tensor<string, []>("op_10829_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10829_end_0 = const()[name = tensor<string, []>("op_10829_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10829_end_mask_0 = const()[name = tensor<string, []>("op_10829_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10829_cast_fp16 = slice_by_index(begin = var_10829_begin_0, end = var_10829_end_0, end_mask = var_10829_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10829_cast_fp16")];
+            tensor<int32, [4]> var_10836_begin_0 = const()[name = tensor<string, []>("op_10836_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10836_end_0 = const()[name = tensor<string, []>("op_10836_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10836_end_mask_0 = const()[name = tensor<string, []>("op_10836_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10836_cast_fp16 = slice_by_index(begin = var_10836_begin_0, end = var_10836_end_0, end_mask = var_10836_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10836_cast_fp16")];
+            tensor<int32, [4]> var_10843_begin_0 = const()[name = tensor<string, []>("op_10843_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10843_end_0 = const()[name = tensor<string, []>("op_10843_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10843_end_mask_0 = const()[name = tensor<string, []>("op_10843_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10843_cast_fp16 = slice_by_index(begin = var_10843_begin_0, end = var_10843_end_0, end_mask = var_10843_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10843_cast_fp16")];
+            tensor<int32, [4]> var_10850_begin_0 = const()[name = tensor<string, []>("op_10850_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10850_end_0 = const()[name = tensor<string, []>("op_10850_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10850_end_mask_0 = const()[name = tensor<string, []>("op_10850_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10850_cast_fp16 = slice_by_index(begin = var_10850_begin_0, end = var_10850_end_0, end_mask = var_10850_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10850_cast_fp16")];
+            tensor<int32, [4]> var_10857_begin_0 = const()[name = tensor<string, []>("op_10857_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10857_end_0 = const()[name = tensor<string, []>("op_10857_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10857_end_mask_0 = const()[name = tensor<string, []>("op_10857_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10857_cast_fp16 = slice_by_index(begin = var_10857_begin_0, end = var_10857_end_0, end_mask = var_10857_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10857_cast_fp16")];
+            tensor<int32, [4]> var_10864_begin_0 = const()[name = tensor<string, []>("op_10864_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10864_end_0 = const()[name = tensor<string, []>("op_10864_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10864_end_mask_0 = const()[name = tensor<string, []>("op_10864_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10864_cast_fp16 = slice_by_index(begin = var_10864_begin_0, end = var_10864_end_0, end_mask = var_10864_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10864_cast_fp16")];
+            tensor<int32, [4]> var_10871_begin_0 = const()[name = tensor<string, []>("op_10871_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10871_end_0 = const()[name = tensor<string, []>("op_10871_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10871_end_mask_0 = const()[name = tensor<string, []>("op_10871_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10871_cast_fp16 = slice_by_index(begin = var_10871_begin_0, end = var_10871_end_0, end_mask = var_10871_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10871_cast_fp16")];
+            tensor<int32, [4]> var_10878_begin_0 = const()[name = tensor<string, []>("op_10878_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10878_end_0 = const()[name = tensor<string, []>("op_10878_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10878_end_mask_0 = const()[name = tensor<string, []>("op_10878_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10878_cast_fp16 = slice_by_index(begin = var_10878_begin_0, end = var_10878_end_0, end_mask = var_10878_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10878_cast_fp16")];
+            tensor<int32, [4]> var_10885_begin_0 = const()[name = tensor<string, []>("op_10885_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10885_end_0 = const()[name = tensor<string, []>("op_10885_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10885_end_mask_0 = const()[name = tensor<string, []>("op_10885_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10885_cast_fp16 = slice_by_index(begin = var_10885_begin_0, end = var_10885_end_0, end_mask = var_10885_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10885_cast_fp16")];
+            tensor<int32, [4]> var_10892_begin_0 = const()[name = tensor<string, []>("op_10892_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10892_end_0 = const()[name = tensor<string, []>("op_10892_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10892_end_mask_0 = const()[name = tensor<string, []>("op_10892_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10892_cast_fp16 = slice_by_index(begin = var_10892_begin_0, end = var_10892_end_0, end_mask = var_10892_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10892_cast_fp16")];
+            tensor<int32, [4]> var_10899_begin_0 = const()[name = tensor<string, []>("op_10899_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10899_end_0 = const()[name = tensor<string, []>("op_10899_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10899_end_mask_0 = const()[name = tensor<string, []>("op_10899_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10899_cast_fp16 = slice_by_index(begin = var_10899_begin_0, end = var_10899_end_0, end_mask = var_10899_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10899_cast_fp16")];
+            tensor<int32, [4]> var_10906_begin_0 = const()[name = tensor<string, []>("op_10906_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10906_end_0 = const()[name = tensor<string, []>("op_10906_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10906_end_mask_0 = const()[name = tensor<string, []>("op_10906_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10906_cast_fp16 = slice_by_index(begin = var_10906_begin_0, end = var_10906_end_0, end_mask = var_10906_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10906_cast_fp16")];
+            tensor<int32, [4]> var_10913_begin_0 = const()[name = tensor<string, []>("op_10913_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10913_end_0 = const()[name = tensor<string, []>("op_10913_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10913_end_mask_0 = const()[name = tensor<string, []>("op_10913_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10913_cast_fp16 = slice_by_index(begin = var_10913_begin_0, end = var_10913_end_0, end_mask = var_10913_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10913_cast_fp16")];
+            tensor<int32, [4]> var_10920_begin_0 = const()[name = tensor<string, []>("op_10920_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10920_end_0 = const()[name = tensor<string, []>("op_10920_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10920_end_mask_0 = const()[name = tensor<string, []>("op_10920_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10920_cast_fp16 = slice_by_index(begin = var_10920_begin_0, end = var_10920_end_0, end_mask = var_10920_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10920_cast_fp16")];
+            tensor<int32, [4]> var_10927_begin_0 = const()[name = tensor<string, []>("op_10927_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10927_end_0 = const()[name = tensor<string, []>("op_10927_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10927_end_mask_0 = const()[name = tensor<string, []>("op_10927_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10927_cast_fp16 = slice_by_index(begin = var_10927_begin_0, end = var_10927_end_0, end_mask = var_10927_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10927_cast_fp16")];
+            tensor<int32, [4]> var_10934_begin_0 = const()[name = tensor<string, []>("op_10934_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10934_end_0 = const()[name = tensor<string, []>("op_10934_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10934_end_mask_0 = const()[name = tensor<string, []>("op_10934_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10934_cast_fp16 = slice_by_index(begin = var_10934_begin_0, end = var_10934_end_0, end_mask = var_10934_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10934_cast_fp16")];
+            tensor<int32, [4]> var_10941_begin_0 = const()[name = tensor<string, []>("op_10941_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10941_end_0 = const()[name = tensor<string, []>("op_10941_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10941_end_mask_0 = const()[name = tensor<string, []>("op_10941_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10941_cast_fp16 = slice_by_index(begin = var_10941_begin_0, end = var_10941_end_0, end_mask = var_10941_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10941_cast_fp16")];
+            tensor<int32, [4]> var_10948_begin_0 = const()[name = tensor<string, []>("op_10948_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10948_end_0 = const()[name = tensor<string, []>("op_10948_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10948_end_mask_0 = const()[name = tensor<string, []>("op_10948_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10948_cast_fp16 = slice_by_index(begin = var_10948_begin_0, end = var_10948_end_0, end_mask = var_10948_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10948_cast_fp16")];
+            tensor<int32, [4]> var_10955_begin_0 = const()[name = tensor<string, []>("op_10955_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10955_end_0 = const()[name = tensor<string, []>("op_10955_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10955_end_mask_0 = const()[name = tensor<string, []>("op_10955_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10955_cast_fp16 = slice_by_index(begin = var_10955_begin_0, end = var_10955_end_0, end_mask = var_10955_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10955_cast_fp16")];
+            tensor<int32, [4]> var_10962_begin_0 = const()[name = tensor<string, []>("op_10962_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10962_end_0 = const()[name = tensor<string, []>("op_10962_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10962_end_mask_0 = const()[name = tensor<string, []>("op_10962_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10962_cast_fp16 = slice_by_index(begin = var_10962_begin_0, end = var_10962_end_0, end_mask = var_10962_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10962_cast_fp16")];
+            tensor<int32, [4]> var_10969_begin_0 = const()[name = tensor<string, []>("op_10969_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10969_end_0 = const()[name = tensor<string, []>("op_10969_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10969_end_mask_0 = const()[name = tensor<string, []>("op_10969_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10969_cast_fp16 = slice_by_index(begin = var_10969_begin_0, end = var_10969_end_0, end_mask = var_10969_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10969_cast_fp16")];
+            tensor<int32, [4]> var_10976_begin_0 = const()[name = tensor<string, []>("op_10976_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10976_end_0 = const()[name = tensor<string, []>("op_10976_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10976_end_mask_0 = const()[name = tensor<string, []>("op_10976_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10976_cast_fp16 = slice_by_index(begin = var_10976_begin_0, end = var_10976_end_0, end_mask = var_10976_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10976_cast_fp16")];
+            tensor<int32, [4]> var_10983_begin_0 = const()[name = tensor<string, []>("op_10983_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10983_end_0 = const()[name = tensor<string, []>("op_10983_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10983_end_mask_0 = const()[name = tensor<string, []>("op_10983_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10983_cast_fp16 = slice_by_index(begin = var_10983_begin_0, end = var_10983_end_0, end_mask = var_10983_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10983_cast_fp16")];
+            tensor<int32, [4]> var_10990_begin_0 = const()[name = tensor<string, []>("op_10990_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10990_end_0 = const()[name = tensor<string, []>("op_10990_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10990_end_mask_0 = const()[name = tensor<string, []>("op_10990_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10990_cast_fp16 = slice_by_index(begin = var_10990_begin_0, end = var_10990_end_0, end_mask = var_10990_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10990_cast_fp16")];
+            tensor<int32, [4]> var_10997_begin_0 = const()[name = tensor<string, []>("op_10997_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10997_end_0 = const()[name = tensor<string, []>("op_10997_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10997_end_mask_0 = const()[name = tensor<string, []>("op_10997_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10997_cast_fp16 = slice_by_index(begin = var_10997_begin_0, end = var_10997_end_0, end_mask = var_10997_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10997_cast_fp16")];
+            tensor<int32, [4]> var_11004_begin_0 = const()[name = tensor<string, []>("op_11004_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11004_end_0 = const()[name = tensor<string, []>("op_11004_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11004_end_mask_0 = const()[name = tensor<string, []>("op_11004_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11004_cast_fp16 = slice_by_index(begin = var_11004_begin_0, end = var_11004_end_0, end_mask = var_11004_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11004_cast_fp16")];
+            tensor<int32, [4]> var_11011_begin_0 = const()[name = tensor<string, []>("op_11011_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11011_end_0 = const()[name = tensor<string, []>("op_11011_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11011_end_mask_0 = const()[name = tensor<string, []>("op_11011_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11011_cast_fp16 = slice_by_index(begin = var_11011_begin_0, end = var_11011_end_0, end_mask = var_11011_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11011_cast_fp16")];
+            tensor<int32, [4]> var_11018_begin_0 = const()[name = tensor<string, []>("op_11018_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11018_end_0 = const()[name = tensor<string, []>("op_11018_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11018_end_mask_0 = const()[name = tensor<string, []>("op_11018_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11018_cast_fp16 = slice_by_index(begin = var_11018_begin_0, end = var_11018_end_0, end_mask = var_11018_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11018_cast_fp16")];
+            tensor<int32, [4]> var_11025_begin_0 = const()[name = tensor<string, []>("op_11025_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11025_end_0 = const()[name = tensor<string, []>("op_11025_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11025_end_mask_0 = const()[name = tensor<string, []>("op_11025_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11025_cast_fp16 = slice_by_index(begin = var_11025_begin_0, end = var_11025_end_0, end_mask = var_11025_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11025_cast_fp16")];
+            tensor<int32, [4]> var_11032_begin_0 = const()[name = tensor<string, []>("op_11032_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11032_end_0 = const()[name = tensor<string, []>("op_11032_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11032_end_mask_0 = const()[name = tensor<string, []>("op_11032_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11032_cast_fp16 = slice_by_index(begin = var_11032_begin_0, end = var_11032_end_0, end_mask = var_11032_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11032_cast_fp16")];
+            tensor<int32, [4]> var_11039_begin_0 = const()[name = tensor<string, []>("op_11039_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11039_end_0 = const()[name = tensor<string, []>("op_11039_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11039_end_mask_0 = const()[name = tensor<string, []>("op_11039_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11039_cast_fp16 = slice_by_index(begin = var_11039_begin_0, end = var_11039_end_0, end_mask = var_11039_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11039_cast_fp16")];
+            tensor<int32, [4]> var_11046_begin_0 = const()[name = tensor<string, []>("op_11046_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11046_end_0 = const()[name = tensor<string, []>("op_11046_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11046_end_mask_0 = const()[name = tensor<string, []>("op_11046_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11046_cast_fp16 = slice_by_index(begin = var_11046_begin_0, end = var_11046_end_0, end_mask = var_11046_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11046_cast_fp16")];
+            tensor<int32, [4]> var_11053_begin_0 = const()[name = tensor<string, []>("op_11053_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11053_end_0 = const()[name = tensor<string, []>("op_11053_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11053_end_mask_0 = const()[name = tensor<string, []>("op_11053_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11053_cast_fp16 = slice_by_index(begin = var_11053_begin_0, end = var_11053_end_0, end_mask = var_11053_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11053_cast_fp16")];
+            tensor<int32, [4]> var_11060_begin_0 = const()[name = tensor<string, []>("op_11060_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11060_end_0 = const()[name = tensor<string, []>("op_11060_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11060_end_mask_0 = const()[name = tensor<string, []>("op_11060_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11060_cast_fp16 = slice_by_index(begin = var_11060_begin_0, end = var_11060_end_0, end_mask = var_11060_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11060_cast_fp16")];
+            tensor<int32, [4]> var_11067_begin_0 = const()[name = tensor<string, []>("op_11067_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11067_end_0 = const()[name = tensor<string, []>("op_11067_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11067_end_mask_0 = const()[name = tensor<string, []>("op_11067_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11067_cast_fp16 = slice_by_index(begin = var_11067_begin_0, end = var_11067_end_0, end_mask = var_11067_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11067_cast_fp16")];
+            tensor<int32, [4]> var_11074_begin_0 = const()[name = tensor<string, []>("op_11074_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11074_end_0 = const()[name = tensor<string, []>("op_11074_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11074_end_mask_0 = const()[name = tensor<string, []>("op_11074_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11074_cast_fp16 = slice_by_index(begin = var_11074_begin_0, end = var_11074_end_0, end_mask = var_11074_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11074_cast_fp16")];
+            tensor<int32, [4]> var_11081_begin_0 = const()[name = tensor<string, []>("op_11081_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11081_end_0 = const()[name = tensor<string, []>("op_11081_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11081_end_mask_0 = const()[name = tensor<string, []>("op_11081_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11081_cast_fp16 = slice_by_index(begin = var_11081_begin_0, end = var_11081_end_0, end_mask = var_11081_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11081_cast_fp16")];
+            tensor<int32, [4]> var_11088_begin_0 = const()[name = tensor<string, []>("op_11088_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11088_end_0 = const()[name = tensor<string, []>("op_11088_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11088_end_mask_0 = const()[name = tensor<string, []>("op_11088_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11088_cast_fp16 = slice_by_index(begin = var_11088_begin_0, end = var_11088_end_0, end_mask = var_11088_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11088_cast_fp16")];
+            tensor<int32, [4]> var_11095_begin_0 = const()[name = tensor<string, []>("op_11095_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11095_end_0 = const()[name = tensor<string, []>("op_11095_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11095_end_mask_0 = const()[name = tensor<string, []>("op_11095_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11095_cast_fp16 = slice_by_index(begin = var_11095_begin_0, end = var_11095_end_0, end_mask = var_11095_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11095_cast_fp16")];
+            tensor<int32, [4]> var_11102_begin_0 = const()[name = tensor<string, []>("op_11102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11102_end_0 = const()[name = tensor<string, []>("op_11102_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11102_end_mask_0 = const()[name = tensor<string, []>("op_11102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11102_cast_fp16 = slice_by_index(begin = var_11102_begin_0, end = var_11102_end_0, end_mask = var_11102_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11102_cast_fp16")];
+            tensor<int32, [4]> var_11109_begin_0 = const()[name = tensor<string, []>("op_11109_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11109_end_0 = const()[name = tensor<string, []>("op_11109_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11109_end_mask_0 = const()[name = tensor<string, []>("op_11109_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11109_cast_fp16 = slice_by_index(begin = var_11109_begin_0, end = var_11109_end_0, end_mask = var_11109_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11109_cast_fp16")];
+            tensor<int32, [4]> var_11116_begin_0 = const()[name = tensor<string, []>("op_11116_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11116_end_0 = const()[name = tensor<string, []>("op_11116_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11116_end_mask_0 = const()[name = tensor<string, []>("op_11116_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11116_cast_fp16 = slice_by_index(begin = var_11116_begin_0, end = var_11116_end_0, end_mask = var_11116_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11116_cast_fp16")];
+            tensor<int32, [4]> var_11123_begin_0 = const()[name = tensor<string, []>("op_11123_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11123_end_0 = const()[name = tensor<string, []>("op_11123_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11123_end_mask_0 = const()[name = tensor<string, []>("op_11123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11123_cast_fp16 = slice_by_index(begin = var_11123_begin_0, end = var_11123_end_0, end_mask = var_11123_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11123_cast_fp16")];
+            tensor<int32, [4]> var_11130_begin_0 = const()[name = tensor<string, []>("op_11130_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11130_end_0 = const()[name = tensor<string, []>("op_11130_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11130_end_mask_0 = const()[name = tensor<string, []>("op_11130_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11130_cast_fp16 = slice_by_index(begin = var_11130_begin_0, end = var_11130_end_0, end_mask = var_11130_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11130_cast_fp16")];
+            tensor<int32, [4]> var_11137_begin_0 = const()[name = tensor<string, []>("op_11137_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11137_end_0 = const()[name = tensor<string, []>("op_11137_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11137_end_mask_0 = const()[name = tensor<string, []>("op_11137_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11137_cast_fp16 = slice_by_index(begin = var_11137_begin_0, end = var_11137_end_0, end_mask = var_11137_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11137_cast_fp16")];
+            tensor<int32, [4]> k_perm_0 = const()[name = tensor<string, []>("k_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_11142_begin_0 = const()[name = tensor<string, []>("op_11142_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11142_end_0 = const()[name = tensor<string, []>("op_11142_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_11142_end_mask_0 = const()[name = tensor<string, []>("op_11142_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_0 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1500, 1, 64]> var_11142_cast_fp16 = slice_by_index(begin = var_11142_begin_0, end = var_11142_end_0, end_mask = var_11142_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11142_cast_fp16")];
+            tensor<int32, [4]> var_11146_begin_0 = const()[name = tensor<string, []>("op_11146_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_11146_end_0 = const()[name = tensor<string, []>("op_11146_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_11146_end_mask_0 = const()[name = tensor<string, []>("op_11146_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11146_cast_fp16 = slice_by_index(begin = var_11146_begin_0, end = var_11146_end_0, end_mask = var_11146_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11146_cast_fp16")];
+            tensor<int32, [4]> var_11150_begin_0 = const()[name = tensor<string, []>("op_11150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_11150_end_0 = const()[name = tensor<string, []>("op_11150_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_11150_end_mask_0 = const()[name = tensor<string, []>("op_11150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11150_cast_fp16 = slice_by_index(begin = var_11150_begin_0, end = var_11150_end_0, end_mask = var_11150_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11150_cast_fp16")];
+            tensor<int32, [4]> var_11154_begin_0 = const()[name = tensor<string, []>("op_11154_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_11154_end_0 = const()[name = tensor<string, []>("op_11154_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_11154_end_mask_0 = const()[name = tensor<string, []>("op_11154_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11154_cast_fp16 = slice_by_index(begin = var_11154_begin_0, end = var_11154_end_0, end_mask = var_11154_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11154_cast_fp16")];
+            tensor<int32, [4]> var_11158_begin_0 = const()[name = tensor<string, []>("op_11158_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_11158_end_0 = const()[name = tensor<string, []>("op_11158_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_11158_end_mask_0 = const()[name = tensor<string, []>("op_11158_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11158_cast_fp16 = slice_by_index(begin = var_11158_begin_0, end = var_11158_end_0, end_mask = var_11158_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11158_cast_fp16")];
+            tensor<int32, [4]> var_11162_begin_0 = const()[name = tensor<string, []>("op_11162_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_11162_end_0 = const()[name = tensor<string, []>("op_11162_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_11162_end_mask_0 = const()[name = tensor<string, []>("op_11162_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11162_cast_fp16 = slice_by_index(begin = var_11162_begin_0, end = var_11162_end_0, end_mask = var_11162_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11162_cast_fp16")];
+            tensor<int32, [4]> var_11166_begin_0 = const()[name = tensor<string, []>("op_11166_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_11166_end_0 = const()[name = tensor<string, []>("op_11166_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_11166_end_mask_0 = const()[name = tensor<string, []>("op_11166_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11166_cast_fp16 = slice_by_index(begin = var_11166_begin_0, end = var_11166_end_0, end_mask = var_11166_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11166_cast_fp16")];
+            tensor<int32, [4]> var_11170_begin_0 = const()[name = tensor<string, []>("op_11170_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_11170_end_0 = const()[name = tensor<string, []>("op_11170_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_11170_end_mask_0 = const()[name = tensor<string, []>("op_11170_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11170_cast_fp16 = slice_by_index(begin = var_11170_begin_0, end = var_11170_end_0, end_mask = var_11170_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11170_cast_fp16")];
+            tensor<int32, [4]> var_11174_begin_0 = const()[name = tensor<string, []>("op_11174_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_11174_end_0 = const()[name = tensor<string, []>("op_11174_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_11174_end_mask_0 = const()[name = tensor<string, []>("op_11174_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11174_cast_fp16 = slice_by_index(begin = var_11174_begin_0, end = var_11174_end_0, end_mask = var_11174_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11174_cast_fp16")];
+            tensor<int32, [4]> var_11178_begin_0 = const()[name = tensor<string, []>("op_11178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_11178_end_0 = const()[name = tensor<string, []>("op_11178_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_11178_end_mask_0 = const()[name = tensor<string, []>("op_11178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11178_cast_fp16 = slice_by_index(begin = var_11178_begin_0, end = var_11178_end_0, end_mask = var_11178_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11178_cast_fp16")];
+            tensor<int32, [4]> var_11182_begin_0 = const()[name = tensor<string, []>("op_11182_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_11182_end_0 = const()[name = tensor<string, []>("op_11182_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_11182_end_mask_0 = const()[name = tensor<string, []>("op_11182_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11182_cast_fp16 = slice_by_index(begin = var_11182_begin_0, end = var_11182_end_0, end_mask = var_11182_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11182_cast_fp16")];
+            tensor<int32, [4]> var_11186_begin_0 = const()[name = tensor<string, []>("op_11186_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_11186_end_0 = const()[name = tensor<string, []>("op_11186_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_11186_end_mask_0 = const()[name = tensor<string, []>("op_11186_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11186_cast_fp16 = slice_by_index(begin = var_11186_begin_0, end = var_11186_end_0, end_mask = var_11186_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11186_cast_fp16")];
+            tensor<int32, [4]> var_11188_begin_0 = const()[name = tensor<string, []>("op_11188_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11188_end_0 = const()[name = tensor<string, []>("op_11188_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11188_end_mask_0 = const()[name = tensor<string, []>("op_11188_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11188_cast_fp16 = slice_by_index(begin = var_11188_begin_0, end = var_11188_end_0, end_mask = var_11188_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11188_cast_fp16")];
+            tensor<int32, [4]> var_11192_begin_0 = const()[name = tensor<string, []>("op_11192_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_11192_end_0 = const()[name = tensor<string, []>("op_11192_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_11192_end_mask_0 = const()[name = tensor<string, []>("op_11192_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11192_cast_fp16 = slice_by_index(begin = var_11192_begin_0, end = var_11192_end_0, end_mask = var_11192_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11192_cast_fp16")];
+            tensor<int32, [4]> var_11196_begin_0 = const()[name = tensor<string, []>("op_11196_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_11196_end_0 = const()[name = tensor<string, []>("op_11196_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_11196_end_mask_0 = const()[name = tensor<string, []>("op_11196_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11196_cast_fp16 = slice_by_index(begin = var_11196_begin_0, end = var_11196_end_0, end_mask = var_11196_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11196_cast_fp16")];
+            tensor<int32, [4]> var_11200_begin_0 = const()[name = tensor<string, []>("op_11200_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_11200_end_0 = const()[name = tensor<string, []>("op_11200_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_11200_end_mask_0 = const()[name = tensor<string, []>("op_11200_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11200_cast_fp16 = slice_by_index(begin = var_11200_begin_0, end = var_11200_end_0, end_mask = var_11200_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11200_cast_fp16")];
+            tensor<int32, [4]> var_11204_begin_0 = const()[name = tensor<string, []>("op_11204_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_11204_end_0 = const()[name = tensor<string, []>("op_11204_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_11204_end_mask_0 = const()[name = tensor<string, []>("op_11204_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11204_cast_fp16 = slice_by_index(begin = var_11204_begin_0, end = var_11204_end_0, end_mask = var_11204_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11204_cast_fp16")];
+            tensor<int32, [4]> var_11208_begin_0 = const()[name = tensor<string, []>("op_11208_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_11208_end_0 = const()[name = tensor<string, []>("op_11208_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_11208_end_mask_0 = const()[name = tensor<string, []>("op_11208_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11208_cast_fp16 = slice_by_index(begin = var_11208_begin_0, end = var_11208_end_0, end_mask = var_11208_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11208_cast_fp16")];
+            tensor<int32, [4]> var_11212_begin_0 = const()[name = tensor<string, []>("op_11212_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_11212_end_0 = const()[name = tensor<string, []>("op_11212_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_11212_end_mask_0 = const()[name = tensor<string, []>("op_11212_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11212_cast_fp16 = slice_by_index(begin = var_11212_begin_0, end = var_11212_end_0, end_mask = var_11212_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11212_cast_fp16")];
+            tensor<int32, [4]> var_11216_begin_0 = const()[name = tensor<string, []>("op_11216_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_11216_end_0 = const()[name = tensor<string, []>("op_11216_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_11216_end_mask_0 = const()[name = tensor<string, []>("op_11216_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11216_cast_fp16 = slice_by_index(begin = var_11216_begin_0, end = var_11216_end_0, end_mask = var_11216_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11216_cast_fp16")];
+            tensor<int32, [4]> var_11220_begin_0 = const()[name = tensor<string, []>("op_11220_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_11220_end_0 = const()[name = tensor<string, []>("op_11220_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_11220_end_mask_0 = const()[name = tensor<string, []>("op_11220_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11220_cast_fp16 = slice_by_index(begin = var_11220_begin_0, end = var_11220_end_0, end_mask = var_11220_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11220_cast_fp16")];
+            tensor<int32, [4]> var_11224_begin_0 = const()[name = tensor<string, []>("op_11224_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_11224_end_0 = const()[name = tensor<string, []>("op_11224_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_11224_end_mask_0 = const()[name = tensor<string, []>("op_11224_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11224_cast_fp16 = slice_by_index(begin = var_11224_begin_0, end = var_11224_end_0, end_mask = var_11224_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11224_cast_fp16")];
+            tensor<int32, [4]> var_11228_begin_0 = const()[name = tensor<string, []>("op_11228_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_11228_end_0 = const()[name = tensor<string, []>("op_11228_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_11228_end_mask_0 = const()[name = tensor<string, []>("op_11228_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11228_cast_fp16 = slice_by_index(begin = var_11228_begin_0, end = var_11228_end_0, end_mask = var_11228_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11228_cast_fp16")];
+            tensor<int32, [4]> var_11232_begin_0 = const()[name = tensor<string, []>("op_11232_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_11232_end_0 = const()[name = tensor<string, []>("op_11232_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_11232_end_mask_0 = const()[name = tensor<string, []>("op_11232_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11232_cast_fp16 = slice_by_index(begin = var_11232_begin_0, end = var_11232_end_0, end_mask = var_11232_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11232_cast_fp16")];
+            tensor<string, []> var_11236_equation_0 = const()[name = tensor<string, []>("op_11236_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11236_cast_fp16 = einsum(equation = var_11236_equation_0, values = (var_11142_cast_fp16, var_10808_cast_fp16))[name = tensor<string, []>("op_11236_cast_fp16")];
+            tensor<fp16, []> var_11237_to_fp16 = const()[name = tensor<string, []>("op_11237_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1057_cast_fp16 = mul(x = var_11236_cast_fp16, y = var_11237_to_fp16)[name = tensor<string, []>("aw_chunk_1057_cast_fp16")];
+            tensor<string, []> var_11240_equation_0 = const()[name = tensor<string, []>("op_11240_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11240_cast_fp16 = einsum(equation = var_11240_equation_0, values = (var_11142_cast_fp16, var_10815_cast_fp16))[name = tensor<string, []>("op_11240_cast_fp16")];
+            tensor<fp16, []> var_11241_to_fp16 = const()[name = tensor<string, []>("op_11241_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1059_cast_fp16 = mul(x = var_11240_cast_fp16, y = var_11241_to_fp16)[name = tensor<string, []>("aw_chunk_1059_cast_fp16")];
+            tensor<string, []> var_11244_equation_0 = const()[name = tensor<string, []>("op_11244_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11244_cast_fp16 = einsum(equation = var_11244_equation_0, values = (var_11142_cast_fp16, var_10822_cast_fp16))[name = tensor<string, []>("op_11244_cast_fp16")];
+            tensor<fp16, []> var_11245_to_fp16 = const()[name = tensor<string, []>("op_11245_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1061_cast_fp16 = mul(x = var_11244_cast_fp16, y = var_11245_to_fp16)[name = tensor<string, []>("aw_chunk_1061_cast_fp16")];
+            tensor<string, []> var_11248_equation_0 = const()[name = tensor<string, []>("op_11248_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11248_cast_fp16 = einsum(equation = var_11248_equation_0, values = (var_11142_cast_fp16, var_10829_cast_fp16))[name = tensor<string, []>("op_11248_cast_fp16")];
+            tensor<fp16, []> var_11249_to_fp16 = const()[name = tensor<string, []>("op_11249_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1063_cast_fp16 = mul(x = var_11248_cast_fp16, y = var_11249_to_fp16)[name = tensor<string, []>("aw_chunk_1063_cast_fp16")];
+            tensor<string, []> var_11252_equation_0 = const()[name = tensor<string, []>("op_11252_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11252_cast_fp16 = einsum(equation = var_11252_equation_0, values = (var_11146_cast_fp16, var_10836_cast_fp16))[name = tensor<string, []>("op_11252_cast_fp16")];
+            tensor<fp16, []> var_11253_to_fp16 = const()[name = tensor<string, []>("op_11253_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1065_cast_fp16 = mul(x = var_11252_cast_fp16, y = var_11253_to_fp16)[name = tensor<string, []>("aw_chunk_1065_cast_fp16")];
+            tensor<string, []> var_11256_equation_0 = const()[name = tensor<string, []>("op_11256_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11256_cast_fp16 = einsum(equation = var_11256_equation_0, values = (var_11146_cast_fp16, var_10843_cast_fp16))[name = tensor<string, []>("op_11256_cast_fp16")];
+            tensor<fp16, []> var_11257_to_fp16 = const()[name = tensor<string, []>("op_11257_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1067_cast_fp16 = mul(x = var_11256_cast_fp16, y = var_11257_to_fp16)[name = tensor<string, []>("aw_chunk_1067_cast_fp16")];
+            tensor<string, []> var_11260_equation_0 = const()[name = tensor<string, []>("op_11260_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11260_cast_fp16 = einsum(equation = var_11260_equation_0, values = (var_11146_cast_fp16, var_10850_cast_fp16))[name = tensor<string, []>("op_11260_cast_fp16")];
+            tensor<fp16, []> var_11261_to_fp16 = const()[name = tensor<string, []>("op_11261_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1069_cast_fp16 = mul(x = var_11260_cast_fp16, y = var_11261_to_fp16)[name = tensor<string, []>("aw_chunk_1069_cast_fp16")];
+            tensor<string, []> var_11264_equation_0 = const()[name = tensor<string, []>("op_11264_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11264_cast_fp16 = einsum(equation = var_11264_equation_0, values = (var_11146_cast_fp16, var_10857_cast_fp16))[name = tensor<string, []>("op_11264_cast_fp16")];
+            tensor<fp16, []> var_11265_to_fp16 = const()[name = tensor<string, []>("op_11265_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1071_cast_fp16 = mul(x = var_11264_cast_fp16, y = var_11265_to_fp16)[name = tensor<string, []>("aw_chunk_1071_cast_fp16")];
+            tensor<string, []> var_11268_equation_0 = const()[name = tensor<string, []>("op_11268_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11268_cast_fp16 = einsum(equation = var_11268_equation_0, values = (var_11150_cast_fp16, var_10864_cast_fp16))[name = tensor<string, []>("op_11268_cast_fp16")];
+            tensor<fp16, []> var_11269_to_fp16 = const()[name = tensor<string, []>("op_11269_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1073_cast_fp16 = mul(x = var_11268_cast_fp16, y = var_11269_to_fp16)[name = tensor<string, []>("aw_chunk_1073_cast_fp16")];
+            tensor<string, []> var_11272_equation_0 = const()[name = tensor<string, []>("op_11272_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11272_cast_fp16 = einsum(equation = var_11272_equation_0, values = (var_11150_cast_fp16, var_10871_cast_fp16))[name = tensor<string, []>("op_11272_cast_fp16")];
+            tensor<fp16, []> var_11273_to_fp16 = const()[name = tensor<string, []>("op_11273_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1075_cast_fp16 = mul(x = var_11272_cast_fp16, y = var_11273_to_fp16)[name = tensor<string, []>("aw_chunk_1075_cast_fp16")];
+            tensor<string, []> var_11276_equation_0 = const()[name = tensor<string, []>("op_11276_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11276_cast_fp16 = einsum(equation = var_11276_equation_0, values = (var_11150_cast_fp16, var_10878_cast_fp16))[name = tensor<string, []>("op_11276_cast_fp16")];
+            tensor<fp16, []> var_11277_to_fp16 = const()[name = tensor<string, []>("op_11277_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1077_cast_fp16 = mul(x = var_11276_cast_fp16, y = var_11277_to_fp16)[name = tensor<string, []>("aw_chunk_1077_cast_fp16")];
+            tensor<string, []> var_11280_equation_0 = const()[name = tensor<string, []>("op_11280_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11280_cast_fp16 = einsum(equation = var_11280_equation_0, values = (var_11150_cast_fp16, var_10885_cast_fp16))[name = tensor<string, []>("op_11280_cast_fp16")];
+            tensor<fp16, []> var_11281_to_fp16 = const()[name = tensor<string, []>("op_11281_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1079_cast_fp16 = mul(x = var_11280_cast_fp16, y = var_11281_to_fp16)[name = tensor<string, []>("aw_chunk_1079_cast_fp16")];
+            tensor<string, []> var_11284_equation_0 = const()[name = tensor<string, []>("op_11284_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11284_cast_fp16 = einsum(equation = var_11284_equation_0, values = (var_11154_cast_fp16, var_10892_cast_fp16))[name = tensor<string, []>("op_11284_cast_fp16")];
+            tensor<fp16, []> var_11285_to_fp16 = const()[name = tensor<string, []>("op_11285_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1081_cast_fp16 = mul(x = var_11284_cast_fp16, y = var_11285_to_fp16)[name = tensor<string, []>("aw_chunk_1081_cast_fp16")];
+            tensor<string, []> var_11288_equation_0 = const()[name = tensor<string, []>("op_11288_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11288_cast_fp16 = einsum(equation = var_11288_equation_0, values = (var_11154_cast_fp16, var_10899_cast_fp16))[name = tensor<string, []>("op_11288_cast_fp16")];
+            tensor<fp16, []> var_11289_to_fp16 = const()[name = tensor<string, []>("op_11289_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1083_cast_fp16 = mul(x = var_11288_cast_fp16, y = var_11289_to_fp16)[name = tensor<string, []>("aw_chunk_1083_cast_fp16")];
+            tensor<string, []> var_11292_equation_0 = const()[name = tensor<string, []>("op_11292_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11292_cast_fp16 = einsum(equation = var_11292_equation_0, values = (var_11154_cast_fp16, var_10906_cast_fp16))[name = tensor<string, []>("op_11292_cast_fp16")];
+            tensor<fp16, []> var_11293_to_fp16 = const()[name = tensor<string, []>("op_11293_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1085_cast_fp16 = mul(x = var_11292_cast_fp16, y = var_11293_to_fp16)[name = tensor<string, []>("aw_chunk_1085_cast_fp16")];
+            tensor<string, []> var_11296_equation_0 = const()[name = tensor<string, []>("op_11296_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11296_cast_fp16 = einsum(equation = var_11296_equation_0, values = (var_11154_cast_fp16, var_10913_cast_fp16))[name = tensor<string, []>("op_11296_cast_fp16")];
+            tensor<fp16, []> var_11297_to_fp16 = const()[name = tensor<string, []>("op_11297_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1087_cast_fp16 = mul(x = var_11296_cast_fp16, y = var_11297_to_fp16)[name = tensor<string, []>("aw_chunk_1087_cast_fp16")];
+            tensor<string, []> var_11300_equation_0 = const()[name = tensor<string, []>("op_11300_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11300_cast_fp16 = einsum(equation = var_11300_equation_0, values = (var_11158_cast_fp16, var_10920_cast_fp16))[name = tensor<string, []>("op_11300_cast_fp16")];
+            tensor<fp16, []> var_11301_to_fp16 = const()[name = tensor<string, []>("op_11301_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1089_cast_fp16 = mul(x = var_11300_cast_fp16, y = var_11301_to_fp16)[name = tensor<string, []>("aw_chunk_1089_cast_fp16")];
+            tensor<string, []> var_11304_equation_0 = const()[name = tensor<string, []>("op_11304_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11304_cast_fp16 = einsum(equation = var_11304_equation_0, values = (var_11158_cast_fp16, var_10927_cast_fp16))[name = tensor<string, []>("op_11304_cast_fp16")];
+            tensor<fp16, []> var_11305_to_fp16 = const()[name = tensor<string, []>("op_11305_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1091_cast_fp16 = mul(x = var_11304_cast_fp16, y = var_11305_to_fp16)[name = tensor<string, []>("aw_chunk_1091_cast_fp16")];
+            tensor<string, []> var_11308_equation_0 = const()[name = tensor<string, []>("op_11308_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11308_cast_fp16 = einsum(equation = var_11308_equation_0, values = (var_11158_cast_fp16, var_10934_cast_fp16))[name = tensor<string, []>("op_11308_cast_fp16")];
+            tensor<fp16, []> var_11309_to_fp16 = const()[name = tensor<string, []>("op_11309_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1093_cast_fp16 = mul(x = var_11308_cast_fp16, y = var_11309_to_fp16)[name = tensor<string, []>("aw_chunk_1093_cast_fp16")];
+            tensor<string, []> var_11312_equation_0 = const()[name = tensor<string, []>("op_11312_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11312_cast_fp16 = einsum(equation = var_11312_equation_0, values = (var_11158_cast_fp16, var_10941_cast_fp16))[name = tensor<string, []>("op_11312_cast_fp16")];
+            tensor<fp16, []> var_11313_to_fp16 = const()[name = tensor<string, []>("op_11313_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1095_cast_fp16 = mul(x = var_11312_cast_fp16, y = var_11313_to_fp16)[name = tensor<string, []>("aw_chunk_1095_cast_fp16")];
+            tensor<string, []> var_11316_equation_0 = const()[name = tensor<string, []>("op_11316_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11316_cast_fp16 = einsum(equation = var_11316_equation_0, values = (var_11162_cast_fp16, var_10948_cast_fp16))[name = tensor<string, []>("op_11316_cast_fp16")];
+            tensor<fp16, []> var_11317_to_fp16 = const()[name = tensor<string, []>("op_11317_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1097_cast_fp16 = mul(x = var_11316_cast_fp16, y = var_11317_to_fp16)[name = tensor<string, []>("aw_chunk_1097_cast_fp16")];
+            tensor<string, []> var_11320_equation_0 = const()[name = tensor<string, []>("op_11320_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11320_cast_fp16 = einsum(equation = var_11320_equation_0, values = (var_11162_cast_fp16, var_10955_cast_fp16))[name = tensor<string, []>("op_11320_cast_fp16")];
+            tensor<fp16, []> var_11321_to_fp16 = const()[name = tensor<string, []>("op_11321_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1099_cast_fp16 = mul(x = var_11320_cast_fp16, y = var_11321_to_fp16)[name = tensor<string, []>("aw_chunk_1099_cast_fp16")];
+            tensor<string, []> var_11324_equation_0 = const()[name = tensor<string, []>("op_11324_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11324_cast_fp16 = einsum(equation = var_11324_equation_0, values = (var_11162_cast_fp16, var_10962_cast_fp16))[name = tensor<string, []>("op_11324_cast_fp16")];
+            tensor<fp16, []> var_11325_to_fp16 = const()[name = tensor<string, []>("op_11325_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1101_cast_fp16 = mul(x = var_11324_cast_fp16, y = var_11325_to_fp16)[name = tensor<string, []>("aw_chunk_1101_cast_fp16")];
+            tensor<string, []> var_11328_equation_0 = const()[name = tensor<string, []>("op_11328_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11328_cast_fp16 = einsum(equation = var_11328_equation_0, values = (var_11162_cast_fp16, var_10969_cast_fp16))[name = tensor<string, []>("op_11328_cast_fp16")];
+            tensor<fp16, []> var_11329_to_fp16 = const()[name = tensor<string, []>("op_11329_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1103_cast_fp16 = mul(x = var_11328_cast_fp16, y = var_11329_to_fp16)[name = tensor<string, []>("aw_chunk_1103_cast_fp16")];
+            tensor<string, []> var_11332_equation_0 = const()[name = tensor<string, []>("op_11332_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11332_cast_fp16 = einsum(equation = var_11332_equation_0, values = (var_11166_cast_fp16, var_10976_cast_fp16))[name = tensor<string, []>("op_11332_cast_fp16")];
+            tensor<fp16, []> var_11333_to_fp16 = const()[name = tensor<string, []>("op_11333_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1105_cast_fp16 = mul(x = var_11332_cast_fp16, y = var_11333_to_fp16)[name = tensor<string, []>("aw_chunk_1105_cast_fp16")];
+            tensor<string, []> var_11336_equation_0 = const()[name = tensor<string, []>("op_11336_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11336_cast_fp16 = einsum(equation = var_11336_equation_0, values = (var_11166_cast_fp16, var_10983_cast_fp16))[name = tensor<string, []>("op_11336_cast_fp16")];
+            tensor<fp16, []> var_11337_to_fp16 = const()[name = tensor<string, []>("op_11337_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1107_cast_fp16 = mul(x = var_11336_cast_fp16, y = var_11337_to_fp16)[name = tensor<string, []>("aw_chunk_1107_cast_fp16")];
+            tensor<string, []> var_11340_equation_0 = const()[name = tensor<string, []>("op_11340_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11340_cast_fp16 = einsum(equation = var_11340_equation_0, values = (var_11166_cast_fp16, var_10990_cast_fp16))[name = tensor<string, []>("op_11340_cast_fp16")];
+            tensor<fp16, []> var_11341_to_fp16 = const()[name = tensor<string, []>("op_11341_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1109_cast_fp16 = mul(x = var_11340_cast_fp16, y = var_11341_to_fp16)[name = tensor<string, []>("aw_chunk_1109_cast_fp16")];
+            tensor<string, []> var_11344_equation_0 = const()[name = tensor<string, []>("op_11344_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11344_cast_fp16 = einsum(equation = var_11344_equation_0, values = (var_11166_cast_fp16, var_10997_cast_fp16))[name = tensor<string, []>("op_11344_cast_fp16")];
+            tensor<fp16, []> var_11345_to_fp16 = const()[name = tensor<string, []>("op_11345_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1111_cast_fp16 = mul(x = var_11344_cast_fp16, y = var_11345_to_fp16)[name = tensor<string, []>("aw_chunk_1111_cast_fp16")];
+            tensor<string, []> var_11348_equation_0 = const()[name = tensor<string, []>("op_11348_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11348_cast_fp16 = einsum(equation = var_11348_equation_0, values = (var_11170_cast_fp16, var_11004_cast_fp16))[name = tensor<string, []>("op_11348_cast_fp16")];
+            tensor<fp16, []> var_11349_to_fp16 = const()[name = tensor<string, []>("op_11349_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1113_cast_fp16 = mul(x = var_11348_cast_fp16, y = var_11349_to_fp16)[name = tensor<string, []>("aw_chunk_1113_cast_fp16")];
+            tensor<string, []> var_11352_equation_0 = const()[name = tensor<string, []>("op_11352_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11352_cast_fp16 = einsum(equation = var_11352_equation_0, values = (var_11170_cast_fp16, var_11011_cast_fp16))[name = tensor<string, []>("op_11352_cast_fp16")];
+            tensor<fp16, []> var_11353_to_fp16 = const()[name = tensor<string, []>("op_11353_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1115_cast_fp16 = mul(x = var_11352_cast_fp16, y = var_11353_to_fp16)[name = tensor<string, []>("aw_chunk_1115_cast_fp16")];
+            tensor<string, []> var_11356_equation_0 = const()[name = tensor<string, []>("op_11356_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11356_cast_fp16 = einsum(equation = var_11356_equation_0, values = (var_11170_cast_fp16, var_11018_cast_fp16))[name = tensor<string, []>("op_11356_cast_fp16")];
+            tensor<fp16, []> var_11357_to_fp16 = const()[name = tensor<string, []>("op_11357_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1117_cast_fp16 = mul(x = var_11356_cast_fp16, y = var_11357_to_fp16)[name = tensor<string, []>("aw_chunk_1117_cast_fp16")];
+            tensor<string, []> var_11360_equation_0 = const()[name = tensor<string, []>("op_11360_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11360_cast_fp16 = einsum(equation = var_11360_equation_0, values = (var_11170_cast_fp16, var_11025_cast_fp16))[name = tensor<string, []>("op_11360_cast_fp16")];
+            tensor<fp16, []> var_11361_to_fp16 = const()[name = tensor<string, []>("op_11361_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1119_cast_fp16 = mul(x = var_11360_cast_fp16, y = var_11361_to_fp16)[name = tensor<string, []>("aw_chunk_1119_cast_fp16")];
+            tensor<string, []> var_11364_equation_0 = const()[name = tensor<string, []>("op_11364_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11364_cast_fp16 = einsum(equation = var_11364_equation_0, values = (var_11174_cast_fp16, var_11032_cast_fp16))[name = tensor<string, []>("op_11364_cast_fp16")];
+            tensor<fp16, []> var_11365_to_fp16 = const()[name = tensor<string, []>("op_11365_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1121_cast_fp16 = mul(x = var_11364_cast_fp16, y = var_11365_to_fp16)[name = tensor<string, []>("aw_chunk_1121_cast_fp16")];
+            tensor<string, []> var_11368_equation_0 = const()[name = tensor<string, []>("op_11368_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11368_cast_fp16 = einsum(equation = var_11368_equation_0, values = (var_11174_cast_fp16, var_11039_cast_fp16))[name = tensor<string, []>("op_11368_cast_fp16")];
+            tensor<fp16, []> var_11369_to_fp16 = const()[name = tensor<string, []>("op_11369_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1123_cast_fp16 = mul(x = var_11368_cast_fp16, y = var_11369_to_fp16)[name = tensor<string, []>("aw_chunk_1123_cast_fp16")];
+            tensor<string, []> var_11372_equation_0 = const()[name = tensor<string, []>("op_11372_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11372_cast_fp16 = einsum(equation = var_11372_equation_0, values = (var_11174_cast_fp16, var_11046_cast_fp16))[name = tensor<string, []>("op_11372_cast_fp16")];
+            tensor<fp16, []> var_11373_to_fp16 = const()[name = tensor<string, []>("op_11373_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1125_cast_fp16 = mul(x = var_11372_cast_fp16, y = var_11373_to_fp16)[name = tensor<string, []>("aw_chunk_1125_cast_fp16")];
+            tensor<string, []> var_11376_equation_0 = const()[name = tensor<string, []>("op_11376_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11376_cast_fp16 = einsum(equation = var_11376_equation_0, values = (var_11174_cast_fp16, var_11053_cast_fp16))[name = tensor<string, []>("op_11376_cast_fp16")];
+            tensor<fp16, []> var_11377_to_fp16 = const()[name = tensor<string, []>("op_11377_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1127_cast_fp16 = mul(x = var_11376_cast_fp16, y = var_11377_to_fp16)[name = tensor<string, []>("aw_chunk_1127_cast_fp16")];
+            tensor<string, []> var_11380_equation_0 = const()[name = tensor<string, []>("op_11380_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11380_cast_fp16 = einsum(equation = var_11380_equation_0, values = (var_11178_cast_fp16, var_11060_cast_fp16))[name = tensor<string, []>("op_11380_cast_fp16")];
+            tensor<fp16, []> var_11381_to_fp16 = const()[name = tensor<string, []>("op_11381_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1129_cast_fp16 = mul(x = var_11380_cast_fp16, y = var_11381_to_fp16)[name = tensor<string, []>("aw_chunk_1129_cast_fp16")];
+            tensor<string, []> var_11384_equation_0 = const()[name = tensor<string, []>("op_11384_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11384_cast_fp16 = einsum(equation = var_11384_equation_0, values = (var_11178_cast_fp16, var_11067_cast_fp16))[name = tensor<string, []>("op_11384_cast_fp16")];
+            tensor<fp16, []> var_11385_to_fp16 = const()[name = tensor<string, []>("op_11385_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1131_cast_fp16 = mul(x = var_11384_cast_fp16, y = var_11385_to_fp16)[name = tensor<string, []>("aw_chunk_1131_cast_fp16")];
+            tensor<string, []> var_11388_equation_0 = const()[name = tensor<string, []>("op_11388_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11388_cast_fp16 = einsum(equation = var_11388_equation_0, values = (var_11178_cast_fp16, var_11074_cast_fp16))[name = tensor<string, []>("op_11388_cast_fp16")];
+            tensor<fp16, []> var_11389_to_fp16 = const()[name = tensor<string, []>("op_11389_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1133_cast_fp16 = mul(x = var_11388_cast_fp16, y = var_11389_to_fp16)[name = tensor<string, []>("aw_chunk_1133_cast_fp16")];
+            tensor<string, []> var_11392_equation_0 = const()[name = tensor<string, []>("op_11392_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11392_cast_fp16 = einsum(equation = var_11392_equation_0, values = (var_11178_cast_fp16, var_11081_cast_fp16))[name = tensor<string, []>("op_11392_cast_fp16")];
+            tensor<fp16, []> var_11393_to_fp16 = const()[name = tensor<string, []>("op_11393_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1135_cast_fp16 = mul(x = var_11392_cast_fp16, y = var_11393_to_fp16)[name = tensor<string, []>("aw_chunk_1135_cast_fp16")];
+            tensor<string, []> var_11396_equation_0 = const()[name = tensor<string, []>("op_11396_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11396_cast_fp16 = einsum(equation = var_11396_equation_0, values = (var_11182_cast_fp16, var_11088_cast_fp16))[name = tensor<string, []>("op_11396_cast_fp16")];
+            tensor<fp16, []> var_11397_to_fp16 = const()[name = tensor<string, []>("op_11397_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1137_cast_fp16 = mul(x = var_11396_cast_fp16, y = var_11397_to_fp16)[name = tensor<string, []>("aw_chunk_1137_cast_fp16")];
+            tensor<string, []> var_11400_equation_0 = const()[name = tensor<string, []>("op_11400_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11400_cast_fp16 = einsum(equation = var_11400_equation_0, values = (var_11182_cast_fp16, var_11095_cast_fp16))[name = tensor<string, []>("op_11400_cast_fp16")];
+            tensor<fp16, []> var_11401_to_fp16 = const()[name = tensor<string, []>("op_11401_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1139_cast_fp16 = mul(x = var_11400_cast_fp16, y = var_11401_to_fp16)[name = tensor<string, []>("aw_chunk_1139_cast_fp16")];
+            tensor<string, []> var_11404_equation_0 = const()[name = tensor<string, []>("op_11404_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11404_cast_fp16 = einsum(equation = var_11404_equation_0, values = (var_11182_cast_fp16, var_11102_cast_fp16))[name = tensor<string, []>("op_11404_cast_fp16")];
+            tensor<fp16, []> var_11405_to_fp16 = const()[name = tensor<string, []>("op_11405_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1141_cast_fp16 = mul(x = var_11404_cast_fp16, y = var_11405_to_fp16)[name = tensor<string, []>("aw_chunk_1141_cast_fp16")];
+            tensor<string, []> var_11408_equation_0 = const()[name = tensor<string, []>("op_11408_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11408_cast_fp16 = einsum(equation = var_11408_equation_0, values = (var_11182_cast_fp16, var_11109_cast_fp16))[name = tensor<string, []>("op_11408_cast_fp16")];
+            tensor<fp16, []> var_11409_to_fp16 = const()[name = tensor<string, []>("op_11409_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1143_cast_fp16 = mul(x = var_11408_cast_fp16, y = var_11409_to_fp16)[name = tensor<string, []>("aw_chunk_1143_cast_fp16")];
+            tensor<string, []> var_11412_equation_0 = const()[name = tensor<string, []>("op_11412_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11412_cast_fp16 = einsum(equation = var_11412_equation_0, values = (var_11186_cast_fp16, var_11116_cast_fp16))[name = tensor<string, []>("op_11412_cast_fp16")];
+            tensor<fp16, []> var_11413_to_fp16 = const()[name = tensor<string, []>("op_11413_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1145_cast_fp16 = mul(x = var_11412_cast_fp16, y = var_11413_to_fp16)[name = tensor<string, []>("aw_chunk_1145_cast_fp16")];
+            tensor<string, []> var_11416_equation_0 = const()[name = tensor<string, []>("op_11416_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11416_cast_fp16 = einsum(equation = var_11416_equation_0, values = (var_11186_cast_fp16, var_11123_cast_fp16))[name = tensor<string, []>("op_11416_cast_fp16")];
+            tensor<fp16, []> var_11417_to_fp16 = const()[name = tensor<string, []>("op_11417_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1147_cast_fp16 = mul(x = var_11416_cast_fp16, y = var_11417_to_fp16)[name = tensor<string, []>("aw_chunk_1147_cast_fp16")];
+            tensor<string, []> var_11420_equation_0 = const()[name = tensor<string, []>("op_11420_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11420_cast_fp16 = einsum(equation = var_11420_equation_0, values = (var_11186_cast_fp16, var_11130_cast_fp16))[name = tensor<string, []>("op_11420_cast_fp16")];
+            tensor<fp16, []> var_11421_to_fp16 = const()[name = tensor<string, []>("op_11421_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1149_cast_fp16 = mul(x = var_11420_cast_fp16, y = var_11421_to_fp16)[name = tensor<string, []>("aw_chunk_1149_cast_fp16")];
+            tensor<string, []> var_11424_equation_0 = const()[name = tensor<string, []>("op_11424_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11424_cast_fp16 = einsum(equation = var_11424_equation_0, values = (var_11186_cast_fp16, var_11137_cast_fp16))[name = tensor<string, []>("op_11424_cast_fp16")];
+            tensor<fp16, []> var_11425_to_fp16 = const()[name = tensor<string, []>("op_11425_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_cast_fp16 = mul(x = var_11424_cast_fp16, y = var_11425_to_fp16)[name = tensor<string, []>("aw_chunk_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11427_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1057_cast_fp16)[name = tensor<string, []>("op_11427_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11428_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1059_cast_fp16)[name = tensor<string, []>("op_11428_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11429_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1061_cast_fp16)[name = tensor<string, []>("op_11429_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11430_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1063_cast_fp16)[name = tensor<string, []>("op_11430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11431_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1065_cast_fp16)[name = tensor<string, []>("op_11431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11432_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1067_cast_fp16)[name = tensor<string, []>("op_11432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11433_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1069_cast_fp16)[name = tensor<string, []>("op_11433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11434_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1071_cast_fp16)[name = tensor<string, []>("op_11434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11435_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1073_cast_fp16)[name = tensor<string, []>("op_11435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11436_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1075_cast_fp16)[name = tensor<string, []>("op_11436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11437_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1077_cast_fp16)[name = tensor<string, []>("op_11437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11438_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1079_cast_fp16)[name = tensor<string, []>("op_11438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11439_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1081_cast_fp16)[name = tensor<string, []>("op_11439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11440_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1083_cast_fp16)[name = tensor<string, []>("op_11440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11441_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1085_cast_fp16)[name = tensor<string, []>("op_11441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11442_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1087_cast_fp16)[name = tensor<string, []>("op_11442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11443_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1089_cast_fp16)[name = tensor<string, []>("op_11443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11444_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1091_cast_fp16)[name = tensor<string, []>("op_11444_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11445_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1093_cast_fp16)[name = tensor<string, []>("op_11445_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11446_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1095_cast_fp16)[name = tensor<string, []>("op_11446_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11447_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1097_cast_fp16)[name = tensor<string, []>("op_11447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11448_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1099_cast_fp16)[name = tensor<string, []>("op_11448_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11449_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1101_cast_fp16)[name = tensor<string, []>("op_11449_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11450_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1103_cast_fp16)[name = tensor<string, []>("op_11450_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11451_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1105_cast_fp16)[name = tensor<string, []>("op_11451_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11452_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1107_cast_fp16)[name = tensor<string, []>("op_11452_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11453_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1109_cast_fp16)[name = tensor<string, []>("op_11453_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11454_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1111_cast_fp16)[name = tensor<string, []>("op_11454_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11455_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1113_cast_fp16)[name = tensor<string, []>("op_11455_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11456_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1115_cast_fp16)[name = tensor<string, []>("op_11456_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11457_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1117_cast_fp16)[name = tensor<string, []>("op_11457_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11458_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1119_cast_fp16)[name = tensor<string, []>("op_11458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11459_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1121_cast_fp16)[name = tensor<string, []>("op_11459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11460_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1123_cast_fp16)[name = tensor<string, []>("op_11460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11461_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1125_cast_fp16)[name = tensor<string, []>("op_11461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11462_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1127_cast_fp16)[name = tensor<string, []>("op_11462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11463_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1129_cast_fp16)[name = tensor<string, []>("op_11463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11464_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1131_cast_fp16)[name = tensor<string, []>("op_11464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11465_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1133_cast_fp16)[name = tensor<string, []>("op_11465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11466_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1135_cast_fp16)[name = tensor<string, []>("op_11466_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11467_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1137_cast_fp16)[name = tensor<string, []>("op_11467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11468_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1139_cast_fp16)[name = tensor<string, []>("op_11468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11469_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1141_cast_fp16)[name = tensor<string, []>("op_11469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11470_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1143_cast_fp16)[name = tensor<string, []>("op_11470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11471_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1145_cast_fp16)[name = tensor<string, []>("op_11471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11472_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1147_cast_fp16)[name = tensor<string, []>("op_11472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11473_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1149_cast_fp16)[name = tensor<string, []>("op_11473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11474_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_cast_fp16)[name = tensor<string, []>("op_11474_cast_fp16")];
+            tensor<string, []> var_11476_equation_0 = const()[name = tensor<string, []>("op_11476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11476_cast_fp16 = einsum(equation = var_11476_equation_0, values = (var_11188_cast_fp16, var_11427_cast_fp16))[name = tensor<string, []>("op_11476_cast_fp16")];
+            tensor<string, []> var_11478_equation_0 = const()[name = tensor<string, []>("op_11478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11478_cast_fp16 = einsum(equation = var_11478_equation_0, values = (var_11188_cast_fp16, var_11428_cast_fp16))[name = tensor<string, []>("op_11478_cast_fp16")];
+            tensor<string, []> var_11480_equation_0 = const()[name = tensor<string, []>("op_11480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11480_cast_fp16 = einsum(equation = var_11480_equation_0, values = (var_11188_cast_fp16, var_11429_cast_fp16))[name = tensor<string, []>("op_11480_cast_fp16")];
+            tensor<string, []> var_11482_equation_0 = const()[name = tensor<string, []>("op_11482_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11482_cast_fp16 = einsum(equation = var_11482_equation_0, values = (var_11188_cast_fp16, var_11430_cast_fp16))[name = tensor<string, []>("op_11482_cast_fp16")];
+            tensor<string, []> var_11484_equation_0 = const()[name = tensor<string, []>("op_11484_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11484_cast_fp16 = einsum(equation = var_11484_equation_0, values = (var_11192_cast_fp16, var_11431_cast_fp16))[name = tensor<string, []>("op_11484_cast_fp16")];
+            tensor<string, []> var_11486_equation_0 = const()[name = tensor<string, []>("op_11486_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11486_cast_fp16 = einsum(equation = var_11486_equation_0, values = (var_11192_cast_fp16, var_11432_cast_fp16))[name = tensor<string, []>("op_11486_cast_fp16")];
+            tensor<string, []> var_11488_equation_0 = const()[name = tensor<string, []>("op_11488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11488_cast_fp16 = einsum(equation = var_11488_equation_0, values = (var_11192_cast_fp16, var_11433_cast_fp16))[name = tensor<string, []>("op_11488_cast_fp16")];
+            tensor<string, []> var_11490_equation_0 = const()[name = tensor<string, []>("op_11490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11490_cast_fp16 = einsum(equation = var_11490_equation_0, values = (var_11192_cast_fp16, var_11434_cast_fp16))[name = tensor<string, []>("op_11490_cast_fp16")];
+            tensor<string, []> var_11492_equation_0 = const()[name = tensor<string, []>("op_11492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11492_cast_fp16 = einsum(equation = var_11492_equation_0, values = (var_11196_cast_fp16, var_11435_cast_fp16))[name = tensor<string, []>("op_11492_cast_fp16")];
+            tensor<string, []> var_11494_equation_0 = const()[name = tensor<string, []>("op_11494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11494_cast_fp16 = einsum(equation = var_11494_equation_0, values = (var_11196_cast_fp16, var_11436_cast_fp16))[name = tensor<string, []>("op_11494_cast_fp16")];
+            tensor<string, []> var_11496_equation_0 = const()[name = tensor<string, []>("op_11496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11496_cast_fp16 = einsum(equation = var_11496_equation_0, values = (var_11196_cast_fp16, var_11437_cast_fp16))[name = tensor<string, []>("op_11496_cast_fp16")];
+            tensor<string, []> var_11498_equation_0 = const()[name = tensor<string, []>("op_11498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11498_cast_fp16 = einsum(equation = var_11498_equation_0, values = (var_11196_cast_fp16, var_11438_cast_fp16))[name = tensor<string, []>("op_11498_cast_fp16")];
+            tensor<string, []> var_11500_equation_0 = const()[name = tensor<string, []>("op_11500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11500_cast_fp16 = einsum(equation = var_11500_equation_0, values = (var_11200_cast_fp16, var_11439_cast_fp16))[name = tensor<string, []>("op_11500_cast_fp16")];
+            tensor<string, []> var_11502_equation_0 = const()[name = tensor<string, []>("op_11502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11502_cast_fp16 = einsum(equation = var_11502_equation_0, values = (var_11200_cast_fp16, var_11440_cast_fp16))[name = tensor<string, []>("op_11502_cast_fp16")];
+            tensor<string, []> var_11504_equation_0 = const()[name = tensor<string, []>("op_11504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11504_cast_fp16 = einsum(equation = var_11504_equation_0, values = (var_11200_cast_fp16, var_11441_cast_fp16))[name = tensor<string, []>("op_11504_cast_fp16")];
+            tensor<string, []> var_11506_equation_0 = const()[name = tensor<string, []>("op_11506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11506_cast_fp16 = einsum(equation = var_11506_equation_0, values = (var_11200_cast_fp16, var_11442_cast_fp16))[name = tensor<string, []>("op_11506_cast_fp16")];
+            tensor<string, []> var_11508_equation_0 = const()[name = tensor<string, []>("op_11508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11508_cast_fp16 = einsum(equation = var_11508_equation_0, values = (var_11204_cast_fp16, var_11443_cast_fp16))[name = tensor<string, []>("op_11508_cast_fp16")];
+            tensor<string, []> var_11510_equation_0 = const()[name = tensor<string, []>("op_11510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11510_cast_fp16 = einsum(equation = var_11510_equation_0, values = (var_11204_cast_fp16, var_11444_cast_fp16))[name = tensor<string, []>("op_11510_cast_fp16")];
+            tensor<string, []> var_11512_equation_0 = const()[name = tensor<string, []>("op_11512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11512_cast_fp16 = einsum(equation = var_11512_equation_0, values = (var_11204_cast_fp16, var_11445_cast_fp16))[name = tensor<string, []>("op_11512_cast_fp16")];
+            tensor<string, []> var_11514_equation_0 = const()[name = tensor<string, []>("op_11514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11514_cast_fp16 = einsum(equation = var_11514_equation_0, values = (var_11204_cast_fp16, var_11446_cast_fp16))[name = tensor<string, []>("op_11514_cast_fp16")];
+            tensor<string, []> var_11516_equation_0 = const()[name = tensor<string, []>("op_11516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11516_cast_fp16 = einsum(equation = var_11516_equation_0, values = (var_11208_cast_fp16, var_11447_cast_fp16))[name = tensor<string, []>("op_11516_cast_fp16")];
+            tensor<string, []> var_11518_equation_0 = const()[name = tensor<string, []>("op_11518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11518_cast_fp16 = einsum(equation = var_11518_equation_0, values = (var_11208_cast_fp16, var_11448_cast_fp16))[name = tensor<string, []>("op_11518_cast_fp16")];
+            tensor<string, []> var_11520_equation_0 = const()[name = tensor<string, []>("op_11520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11520_cast_fp16 = einsum(equation = var_11520_equation_0, values = (var_11208_cast_fp16, var_11449_cast_fp16))[name = tensor<string, []>("op_11520_cast_fp16")];
+            tensor<string, []> var_11522_equation_0 = const()[name = tensor<string, []>("op_11522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11522_cast_fp16 = einsum(equation = var_11522_equation_0, values = (var_11208_cast_fp16, var_11450_cast_fp16))[name = tensor<string, []>("op_11522_cast_fp16")];
+            tensor<string, []> var_11524_equation_0 = const()[name = tensor<string, []>("op_11524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11524_cast_fp16 = einsum(equation = var_11524_equation_0, values = (var_11212_cast_fp16, var_11451_cast_fp16))[name = tensor<string, []>("op_11524_cast_fp16")];
+            tensor<string, []> var_11526_equation_0 = const()[name = tensor<string, []>("op_11526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11526_cast_fp16 = einsum(equation = var_11526_equation_0, values = (var_11212_cast_fp16, var_11452_cast_fp16))[name = tensor<string, []>("op_11526_cast_fp16")];
+            tensor<string, []> var_11528_equation_0 = const()[name = tensor<string, []>("op_11528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11528_cast_fp16 = einsum(equation = var_11528_equation_0, values = (var_11212_cast_fp16, var_11453_cast_fp16))[name = tensor<string, []>("op_11528_cast_fp16")];
+            tensor<string, []> var_11530_equation_0 = const()[name = tensor<string, []>("op_11530_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11530_cast_fp16 = einsum(equation = var_11530_equation_0, values = (var_11212_cast_fp16, var_11454_cast_fp16))[name = tensor<string, []>("op_11530_cast_fp16")];
+            tensor<string, []> var_11532_equation_0 = const()[name = tensor<string, []>("op_11532_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11532_cast_fp16 = einsum(equation = var_11532_equation_0, values = (var_11216_cast_fp16, var_11455_cast_fp16))[name = tensor<string, []>("op_11532_cast_fp16")];
+            tensor<string, []> var_11534_equation_0 = const()[name = tensor<string, []>("op_11534_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11534_cast_fp16 = einsum(equation = var_11534_equation_0, values = (var_11216_cast_fp16, var_11456_cast_fp16))[name = tensor<string, []>("op_11534_cast_fp16")];
+            tensor<string, []> var_11536_equation_0 = const()[name = tensor<string, []>("op_11536_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11536_cast_fp16 = einsum(equation = var_11536_equation_0, values = (var_11216_cast_fp16, var_11457_cast_fp16))[name = tensor<string, []>("op_11536_cast_fp16")];
+            tensor<string, []> var_11538_equation_0 = const()[name = tensor<string, []>("op_11538_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11538_cast_fp16 = einsum(equation = var_11538_equation_0, values = (var_11216_cast_fp16, var_11458_cast_fp16))[name = tensor<string, []>("op_11538_cast_fp16")];
+            tensor<string, []> var_11540_equation_0 = const()[name = tensor<string, []>("op_11540_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11540_cast_fp16 = einsum(equation = var_11540_equation_0, values = (var_11220_cast_fp16, var_11459_cast_fp16))[name = tensor<string, []>("op_11540_cast_fp16")];
+            tensor<string, []> var_11542_equation_0 = const()[name = tensor<string, []>("op_11542_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11542_cast_fp16 = einsum(equation = var_11542_equation_0, values = (var_11220_cast_fp16, var_11460_cast_fp16))[name = tensor<string, []>("op_11542_cast_fp16")];
+            tensor<string, []> var_11544_equation_0 = const()[name = tensor<string, []>("op_11544_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11544_cast_fp16 = einsum(equation = var_11544_equation_0, values = (var_11220_cast_fp16, var_11461_cast_fp16))[name = tensor<string, []>("op_11544_cast_fp16")];
+            tensor<string, []> var_11546_equation_0 = const()[name = tensor<string, []>("op_11546_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11546_cast_fp16 = einsum(equation = var_11546_equation_0, values = (var_11220_cast_fp16, var_11462_cast_fp16))[name = tensor<string, []>("op_11546_cast_fp16")];
+            tensor<string, []> var_11548_equation_0 = const()[name = tensor<string, []>("op_11548_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11548_cast_fp16 = einsum(equation = var_11548_equation_0, values = (var_11224_cast_fp16, var_11463_cast_fp16))[name = tensor<string, []>("op_11548_cast_fp16")];
+            tensor<string, []> var_11550_equation_0 = const()[name = tensor<string, []>("op_11550_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11550_cast_fp16 = einsum(equation = var_11550_equation_0, values = (var_11224_cast_fp16, var_11464_cast_fp16))[name = tensor<string, []>("op_11550_cast_fp16")];
+            tensor<string, []> var_11552_equation_0 = const()[name = tensor<string, []>("op_11552_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11552_cast_fp16 = einsum(equation = var_11552_equation_0, values = (var_11224_cast_fp16, var_11465_cast_fp16))[name = tensor<string, []>("op_11552_cast_fp16")];
+            tensor<string, []> var_11554_equation_0 = const()[name = tensor<string, []>("op_11554_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11554_cast_fp16 = einsum(equation = var_11554_equation_0, values = (var_11224_cast_fp16, var_11466_cast_fp16))[name = tensor<string, []>("op_11554_cast_fp16")];
+            tensor<string, []> var_11556_equation_0 = const()[name = tensor<string, []>("op_11556_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11556_cast_fp16 = einsum(equation = var_11556_equation_0, values = (var_11228_cast_fp16, var_11467_cast_fp16))[name = tensor<string, []>("op_11556_cast_fp16")];
+            tensor<string, []> var_11558_equation_0 = const()[name = tensor<string, []>("op_11558_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11558_cast_fp16 = einsum(equation = var_11558_equation_0, values = (var_11228_cast_fp16, var_11468_cast_fp16))[name = tensor<string, []>("op_11558_cast_fp16")];
+            tensor<string, []> var_11560_equation_0 = const()[name = tensor<string, []>("op_11560_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11560_cast_fp16 = einsum(equation = var_11560_equation_0, values = (var_11228_cast_fp16, var_11469_cast_fp16))[name = tensor<string, []>("op_11560_cast_fp16")];
+            tensor<string, []> var_11562_equation_0 = const()[name = tensor<string, []>("op_11562_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11562_cast_fp16 = einsum(equation = var_11562_equation_0, values = (var_11228_cast_fp16, var_11470_cast_fp16))[name = tensor<string, []>("op_11562_cast_fp16")];
+            tensor<string, []> var_11564_equation_0 = const()[name = tensor<string, []>("op_11564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11564_cast_fp16 = einsum(equation = var_11564_equation_0, values = (var_11232_cast_fp16, var_11471_cast_fp16))[name = tensor<string, []>("op_11564_cast_fp16")];
+            tensor<string, []> var_11566_equation_0 = const()[name = tensor<string, []>("op_11566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11566_cast_fp16 = einsum(equation = var_11566_equation_0, values = (var_11232_cast_fp16, var_11472_cast_fp16))[name = tensor<string, []>("op_11566_cast_fp16")];
+            tensor<string, []> var_11568_equation_0 = const()[name = tensor<string, []>("op_11568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11568_cast_fp16 = einsum(equation = var_11568_equation_0, values = (var_11232_cast_fp16, var_11473_cast_fp16))[name = tensor<string, []>("op_11568_cast_fp16")];
+            tensor<string, []> var_11570_equation_0 = const()[name = tensor<string, []>("op_11570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11570_cast_fp16 = einsum(equation = var_11570_equation_0, values = (var_11232_cast_fp16, var_11474_cast_fp16))[name = tensor<string, []>("op_11570_cast_fp16")];
+            tensor<bool, []> var_11572_interleave_0 = const()[name = tensor<string, []>("op_11572_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11572_cast_fp16 = concat(axis = var_10683, interleave = var_11572_interleave_0, values = (var_11476_cast_fp16, var_11478_cast_fp16, var_11480_cast_fp16, var_11482_cast_fp16))[name = tensor<string, []>("op_11572_cast_fp16")];
+            tensor<bool, []> var_11574_interleave_0 = const()[name = tensor<string, []>("op_11574_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11574_cast_fp16 = concat(axis = var_10683, interleave = var_11574_interleave_0, values = (var_11484_cast_fp16, var_11486_cast_fp16, var_11488_cast_fp16, var_11490_cast_fp16))[name = tensor<string, []>("op_11574_cast_fp16")];
+            tensor<bool, []> var_11576_interleave_0 = const()[name = tensor<string, []>("op_11576_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11576_cast_fp16 = concat(axis = var_10683, interleave = var_11576_interleave_0, values = (var_11492_cast_fp16, var_11494_cast_fp16, var_11496_cast_fp16, var_11498_cast_fp16))[name = tensor<string, []>("op_11576_cast_fp16")];
+            tensor<bool, []> var_11578_interleave_0 = const()[name = tensor<string, []>("op_11578_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11578_cast_fp16 = concat(axis = var_10683, interleave = var_11578_interleave_0, values = (var_11500_cast_fp16, var_11502_cast_fp16, var_11504_cast_fp16, var_11506_cast_fp16))[name = tensor<string, []>("op_11578_cast_fp16")];
+            tensor<bool, []> var_11580_interleave_0 = const()[name = tensor<string, []>("op_11580_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11580_cast_fp16 = concat(axis = var_10683, interleave = var_11580_interleave_0, values = (var_11508_cast_fp16, var_11510_cast_fp16, var_11512_cast_fp16, var_11514_cast_fp16))[name = tensor<string, []>("op_11580_cast_fp16")];
+            tensor<bool, []> var_11582_interleave_0 = const()[name = tensor<string, []>("op_11582_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11582_cast_fp16 = concat(axis = var_10683, interleave = var_11582_interleave_0, values = (var_11516_cast_fp16, var_11518_cast_fp16, var_11520_cast_fp16, var_11522_cast_fp16))[name = tensor<string, []>("op_11582_cast_fp16")];
+            tensor<bool, []> var_11584_interleave_0 = const()[name = tensor<string, []>("op_11584_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11584_cast_fp16 = concat(axis = var_10683, interleave = var_11584_interleave_0, values = (var_11524_cast_fp16, var_11526_cast_fp16, var_11528_cast_fp16, var_11530_cast_fp16))[name = tensor<string, []>("op_11584_cast_fp16")];
+            tensor<bool, []> var_11586_interleave_0 = const()[name = tensor<string, []>("op_11586_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11586_cast_fp16 = concat(axis = var_10683, interleave = var_11586_interleave_0, values = (var_11532_cast_fp16, var_11534_cast_fp16, var_11536_cast_fp16, var_11538_cast_fp16))[name = tensor<string, []>("op_11586_cast_fp16")];
+            tensor<bool, []> var_11588_interleave_0 = const()[name = tensor<string, []>("op_11588_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11588_cast_fp16 = concat(axis = var_10683, interleave = var_11588_interleave_0, values = (var_11540_cast_fp16, var_11542_cast_fp16, var_11544_cast_fp16, var_11546_cast_fp16))[name = tensor<string, []>("op_11588_cast_fp16")];
+            tensor<bool, []> var_11590_interleave_0 = const()[name = tensor<string, []>("op_11590_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11590_cast_fp16 = concat(axis = var_10683, interleave = var_11590_interleave_0, values = (var_11548_cast_fp16, var_11550_cast_fp16, var_11552_cast_fp16, var_11554_cast_fp16))[name = tensor<string, []>("op_11590_cast_fp16")];
+            tensor<bool, []> var_11592_interleave_0 = const()[name = tensor<string, []>("op_11592_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11592_cast_fp16 = concat(axis = var_10683, interleave = var_11592_interleave_0, values = (var_11556_cast_fp16, var_11558_cast_fp16, var_11560_cast_fp16, var_11562_cast_fp16))[name = tensor<string, []>("op_11592_cast_fp16")];
+            tensor<bool, []> var_11594_interleave_0 = const()[name = tensor<string, []>("op_11594_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11594_cast_fp16 = concat(axis = var_10683, interleave = var_11594_interleave_0, values = (var_11564_cast_fp16, var_11566_cast_fp16, var_11568_cast_fp16, var_11570_cast_fp16))[name = tensor<string, []>("op_11594_cast_fp16")];
+            tensor<bool, []> input_89_interleave_0 = const()[name = tensor<string, []>("input_89_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_89_cast_fp16 = concat(axis = var_10700, interleave = input_89_interleave_0, values = (var_11572_cast_fp16, var_11574_cast_fp16, var_11576_cast_fp16, var_11578_cast_fp16, var_11580_cast_fp16, var_11582_cast_fp16, var_11584_cast_fp16, var_11586_cast_fp16, var_11588_cast_fp16, var_11590_cast_fp16, var_11592_cast_fp16, var_11594_cast_fp16))[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<int32, [2]> var_11599 = const()[name = tensor<string, []>("op_11599"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11601 = const()[name = tensor<string, []>("op_11601"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165690624)))];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166870336)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = var_11601, groups = var_10700, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_11599, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> var_11607 = const()[name = tensor<string, []>("op_11607"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_47_cast_fp16 = reduce_mean(axes = var_11607, keep_dims = var_10701, x = inputs_47_cast_fp16)[name = tensor<string, []>("channels_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_47_cast_fp16 = sub(x = inputs_47_cast_fp16, y = channels_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = zero_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_sq_47_cast_fp16")];
+            tensor<int32, [1]> var_11611 = const()[name = tensor<string, []>("op_11611"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_11612_cast_fp16 = reduce_mean(axes = var_11611, keep_dims = var_10701, x = zero_mean_sq_47_cast_fp16)[name = tensor<string, []>("op_11612_cast_fp16")];
+            tensor<fp16, []> var_11613_to_fp16 = const()[name = tensor<string, []>("op_11613_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_11614_cast_fp16 = add(x = var_11612_cast_fp16, y = var_11613_to_fp16)[name = tensor<string, []>("op_11614_cast_fp16")];
+            tensor<fp16, []> denom_47_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_47_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0_to_fp16, x = var_11614_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = denom_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_91_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_91_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166871936)))];
+            tensor<fp16, [768]> input_91_beta_0_to_fp16 = const()[name = tensor<string, []>("input_91_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166873536)))];
+            tensor<fp16, []> input_91_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_91_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<int32, [2]> var_11625 = const()[name = tensor<string, []>("op_11625"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11627 = const()[name = tensor<string, []>("op_11627"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_93_pad_type_0 = const()[name = tensor<string, []>("input_93_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_93_pad_0 = const()[name = tensor<string, []>("input_93_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166875136)))];
+            tensor<fp16, [3072]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171593792)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_93_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = var_11627, groups = var_10700, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = var_11625, weight = layers_11_fc1_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_93_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_11633 = const()[name = tensor<string, []>("op_11633"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11635 = const()[name = tensor<string, []>("op_11635"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171600000)))];
+            tensor<fp16, [768]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176318656)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = var_11635, groups = var_10700, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_11633, weight = layers_11_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_11641 = const()[name = tensor<string, []>("op_11641"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_11645 = const()[name = tensor<string, []>("op_11645"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_cast_fp16 = reduce_mean(axes = var_11645, keep_dims = var_11641, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_11649 = const()[name = tensor<string, []>("op_11649"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_11650_cast_fp16 = reduce_mean(axes = var_11649, keep_dims = var_11641, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_11650_cast_fp16")];
+            tensor<fp16, []> var_11651_to_fp16 = const()[name = tensor<string, []>("op_11651_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_11652_cast_fp16 = add(x = var_11650_cast_fp16, y = var_11651_to_fp16)[name = tensor<string, []>("op_11652_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_11652_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176320256)))];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176321856)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..b6314fec31f6cf5901665aba75ae05333313cc2c
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68ca04660b8b050c68ca54c27d97c47e4133bc591422cb7009de8922d56fb8c9
+size 155271
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3cea13e14c52f389d44ebbeaef758c35719d9f32
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe35cef2c9406993a635639b16f373f6debb0215ac115b7bf93fa03c8e10310b
+size 176323456
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8cce829d60c4056be93333a562d47d3bb2908b9b
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f77e6457285248f99cd7aa3fd4cc2efbb17733e63e7023ac53abe1f95785d07
+size 243
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1d9e7a102f740c68cdfc7272dc5b8007c48416a
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabdc5aa69f6ef4d97dc9499f5c30514e00e96b53b750b33a5a6471363c71662
+size 328
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..022c382ec7041de63c59dd98893c3134d01b23aa
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Pad" : 1,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.add" : 3,
+      "Ios16.square" : 2,
+      "Ios16.matmul" : 1,
+      "Squeeze" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Ios16.reshape" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a63d7fa99d6d86db1b76a1f53640cb4aa25e0210
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> input_3_constant_val_0_to_fp16 = const()[name = tensor<string, []>("input_3_constant_val_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = input_3_constant_val_0_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6110c0f1e30e3ddad047c471f30fb114a2e5562e
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:267017e533b5f542d195fd9a775f2ba649075128283ce8e86c63a2ec20de5b07
+size 354080
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcd839bbf91242e087302939502d648dd193dfe8
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39c0d6d55353bc61ef8071081bb958dd1ab7b0b7f2a3338a797f1a64211e084c
+size 243
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..adf3d2eb117cb6a142a8021e6c0b9fdcad7b1518
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2ccd0b8920701386ab9554f7db47b43e55ee07863280ee5d829d5272839adc2
+size 633
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/metadata.json b/openai_whisper-small/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cdc64a42b740095de45e0b4e49e261bea73558a
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,165 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Split" : 2,
+      "Concat" : 3,
+      "Ios16.rsqrt" : 37,
+      "Ios16.mul" : 146,
+      "Squeeze" : 1,
+      "SliceByIndex" : 20,
+      "Ios16.sub" : 38,
+      "Transpose" : 1,
+      "Ios16.conv" : 120,
+      "Ios16.add" : 110,
+      "Ios16.linear" : 1,
+      "Ios16.matmul" : 48,
+      "Ios16.gelu" : 12,
+      "Ios16.reduceMean" : 75,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 37,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 96,
+      "Ios16.softmax" : 24
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 224]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 224]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/model.mil b/openai_whisper-small/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..dca5e408299638f61530c44b7ed4442b8e3b646b
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,2105 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 224]> decoder_key_padding_mask, tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 9216, 1, 224]> key_cache, tensor<fp16, [1, 224]> kv_cache_update_mask, tensor<fp16, [1, 9216, 1, 224]> value_cache) {
+            tensor<int32, []> var_40_axis_0 = const()[name = tensor<string, []>("op_40_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_40_batch_dims_0 = const()[name = tensor<string, []>("op_40_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51865, 768]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 768]> var_40_cast_fp16 = gather(axis = var_40_axis_0, batch_dims = var_40_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_40_cast_fp16")];
+            tensor<int32, []> var_44_axis_0 = const()[name = tensor<string, []>("op_44_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_44_batch_dims_0 = const()[name = tensor<string, []>("op_44_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 768]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79664768)))];
+            tensor<fp16, [1, 768]> var_44_cast_fp16 = gather(axis = var_44_axis_0, batch_dims = var_44_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, [1, 768]> hidden_states_1_cast_fp16 = add(x = var_40_cast_fp16, y = var_44_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_58_axes_0 = const()[name = tensor<string, []>("op_58_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_58_cast_fp16 = expand_dims(axes = var_58_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_58_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 768, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_58_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [12]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [12]>([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])];
+            tensor<int32, []> var_63_axis_0 = const()[name = tensor<string, []>("op_63_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_0, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_1, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_2, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_3, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_4, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_5, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_6, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_7, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_8, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_9, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_10, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_11 = split(axis = var_63_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_63_cast_fp16")];
+            tensor<int32, [12]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [12]>([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])];
+            tensor<int32, []> var_78_axis_0 = const()[name = tensor<string, []>("op_78_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_0, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_1, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_2, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_3, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_4, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_5, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_6, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_7, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_8, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_9, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_10, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_11 = split(axis = var_78_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_78_cast_fp16")];
+            tensor<int32, []> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_103 = const()[name = tensor<string, []>("op_103"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_104 = const()[name = tensor<string, []>("op_104"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_116 = const()[name = tensor<string, []>("op_116"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_116, keep_dims = var_104, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_120 = const()[name = tensor<string, []>("op_120"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_121_cast_fp16 = reduce_mean(axes = var_120, keep_dims = var_104, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
+            tensor<fp16, []> var_122_to_fp16 = const()[name = tensor<string, []>("op_122_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_123_cast_fp16 = add(x = var_121_cast_fp16, y = var_122_to_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_123_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80352960)))];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80354560)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80356160)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80357760)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_140 = const()[name = tensor<string, []>("op_140"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80359360)))];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81539072)))];
+            tensor<fp16, [1, 768, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_140, groups = var_103, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_138, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_144 = const()[name = tensor<string, []>("op_144"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_146 = const()[name = tensor<string, []>("op_146"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81540672)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_1_cast_fp16 = conv(dilations = var_146, groups = var_103, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = var_144, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<int32, [2]> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_153 = const()[name = tensor<string, []>("op_153"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82720384)))];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83900096)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_153, groups = var_103, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = var_151, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_157_axes_0 = const()[name = tensor<string, []>("op_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_157_cast_fp16 = expand_dims(axes = var_157_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<int32, [1]> var_158_axes_0 = const()[name = tensor<string, []>("op_158_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_158_cast_fp16 = expand_dims(axes = var_158_axes_0, x = var_157_cast_fp16)[name = tensor<string, []>("op_158_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_160_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
+            tensor<fp16, []> var_97_to_fp16 = const()[name = tensor<string, []>("op_97_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 224]> var_161_cast_fp16 = sub(x = var_97_to_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_161_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_162_cast_fp16 = mul(x = var_63_cast_fp16_0, y = var_161_cast_fp16)[name = tensor<string, []>("op_162_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_1_cast_fp16 = add(x = var_160_cast_fp16, y = var_162_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_164_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_164_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_166_cast_fp16 = mul(x = var_78_cast_fp16_0, y = var_161_cast_fp16)[name = tensor<string, []>("op_166_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_1_cast_fp16 = add(x = var_164_cast_fp16, y = var_166_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_169 = const()[name = tensor<string, []>("op_169"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_170_cast_fp16 = reshape(shape = var_169, x = query_1_cast_fp16)[name = tensor<string, []>("op_170_cast_fp16")];
+            tensor<fp16, []> var_171_to_fp16 = const()[name = tensor<string, []>("op_171_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_172_cast_fp16 = mul(x = var_170_cast_fp16, y = var_171_to_fp16)[name = tensor<string, []>("op_172_cast_fp16")];
+            tensor<int32, [4]> var_173 = const()[name = tensor<string, []>("op_173"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_174_cast_fp16 = reshape(shape = var_173, x = key_1_cast_fp16)[name = tensor<string, []>("op_174_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_172_cast_fp16, y = var_174_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_178_axes_0 = const()[name = tensor<string, []>("op_178_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_178_cast_fp16 = expand_dims(axes = var_178_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_178_cast_fp16")];
+            tensor<int32, [1]> var_179_axes_0 = const()[name = tensor<string, []>("op_179_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_179_cast_fp16 = expand_dims(axes = var_179_axes_0, x = var_178_cast_fp16)[name = tensor<string, []>("op_179_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_182_cast_fp16 = softmax(axis = var_96, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_182_cast_fp16")];
+            tensor<int32, [4]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_184_cast_fp16 = reshape(shape = var_183, x = value_1_cast_fp16)[name = tensor<string, []>("op_184_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_184_cast_fp16, y = var_182_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_187 = const()[name = tensor<string, []>("op_187"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_1_cast_fp16 = reshape(shape = var_187, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_191 = const()[name = tensor<string, []>("op_191"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83901696)))];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85081408)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_193, groups = var_103, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_191, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_203 = const()[name = tensor<string, []>("op_203"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_203, keep_dims = var_104, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_207 = const()[name = tensor<string, []>("op_207"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_208_cast_fp16 = reduce_mean(axes = var_207, keep_dims = var_104, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_208_cast_fp16")];
+            tensor<fp16, []> var_209_to_fp16 = const()[name = tensor<string, []>("op_209_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_210_cast_fp16 = add(x = var_208_cast_fp16, y = var_209_to_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_210_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85083008)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85084608)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_227 = const()[name = tensor<string, []>("op_227"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85086208)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86265920)))];
+            tensor<fp16, [1, 768, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = var_227, groups = var_103, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_225, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_233 = const()[name = tensor<string, []>("op_233"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86267520)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_233, groups = var_103, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_231, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_238 = const()[name = tensor<string, []>("op_238"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_240 = const()[name = tensor<string, []>("op_240"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87447232)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88626944)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = var_240, groups = var_103, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_238, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_244 = const()[name = tensor<string, []>("op_244"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_245_cast_fp16 = reshape(shape = var_244, x = query_3_cast_fp16)[name = tensor<string, []>("op_245_cast_fp16")];
+            tensor<fp16, []> var_246_to_fp16 = const()[name = tensor<string, []>("op_246_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_247_cast_fp16 = mul(x = var_245_cast_fp16, y = var_246_to_fp16)[name = tensor<string, []>("op_247_cast_fp16")];
+            tensor<int32, [4]> var_248 = const()[name = tensor<string, []>("op_248"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_249_cast_fp16 = reshape(shape = var_248, x = key_3_cast_fp16)[name = tensor<string, []>("op_249_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_247_cast_fp16, y = var_249_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_96, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_253 = const()[name = tensor<string, []>("op_253"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_254_cast_fp16 = reshape(shape = var_253, x = value_3_cast_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_254_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_257 = const()[name = tensor<string, []>("op_257"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_3_cast_fp16 = reshape(shape = var_257, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_261 = const()[name = tensor<string, []>("op_261"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_263 = const()[name = tensor<string, []>("op_263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88628544)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89808256)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = var_263, groups = var_103, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_261, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> var_269 = const()[name = tensor<string, []>("op_269"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_269, keep_dims = var_104, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_273 = const()[name = tensor<string, []>("op_273"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_274_cast_fp16 = reduce_mean(axes = var_273, keep_dims = var_104, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_274_cast_fp16")];
+            tensor<fp16, []> var_275_to_fp16 = const()[name = tensor<string, []>("op_275_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_276_cast_fp16 = add(x = var_274_cast_fp16, y = var_275_to_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_276_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [768]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89809856)))];
+            tensor<fp16, [768]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89811456)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [2]> var_287 = const()[name = tensor<string, []>("op_287"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_289 = const()[name = tensor<string, []>("op_289"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89813056)))];
+            tensor<fp16, [3072]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94531712)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_289, groups = var_103, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_287, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_295 = const()[name = tensor<string, []>("op_295"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_297 = const()[name = tensor<string, []>("op_297"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94537920)))];
+            tensor<fp16, [768]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99256576)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_297, groups = var_103, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = var_295, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_317 = const()[name = tensor<string, []>("op_317"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_330 = const()[name = tensor<string, []>("op_330"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_330, keep_dims = var_318, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_334 = const()[name = tensor<string, []>("op_334"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_335_cast_fp16 = reduce_mean(axes = var_334, keep_dims = var_318, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_335_cast_fp16")];
+            tensor<fp16, []> var_336_to_fp16 = const()[name = tensor<string, []>("op_336_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_337_cast_fp16 = add(x = var_335_cast_fp16, y = var_336_to_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_337_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [768]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99258176)))];
+            tensor<fp16, [768]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99259776)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<int32, [2]> var_352 = const()[name = tensor<string, []>("op_352"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_354 = const()[name = tensor<string, []>("op_354"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99261376)))];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100441088)))];
+            tensor<fp16, [1, 768, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_354, groups = var_317, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_352, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_358 = const()[name = tensor<string, []>("op_358"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_360 = const()[name = tensor<string, []>("op_360"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100442688)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_3_cast_fp16 = conv(dilations = var_360, groups = var_317, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = var_358, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101622400)))];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102802112)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_367, groups = var_317, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = var_365, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_374_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_376_cast_fp16 = mul(x = var_63_cast_fp16_1, y = var_161_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_5_cast_fp16 = add(x = var_374_cast_fp16, y = var_376_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_378_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_378_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_380_cast_fp16 = mul(x = var_78_cast_fp16_1, y = var_161_cast_fp16)[name = tensor<string, []>("op_380_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_5_cast_fp16 = add(x = var_378_cast_fp16, y = var_380_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_383 = const()[name = tensor<string, []>("op_383"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_384_cast_fp16 = reshape(shape = var_383, x = query_5_cast_fp16)[name = tensor<string, []>("op_384_cast_fp16")];
+            tensor<fp16, []> var_385_to_fp16 = const()[name = tensor<string, []>("op_385_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_386_cast_fp16 = mul(x = var_384_cast_fp16, y = var_385_to_fp16)[name = tensor<string, []>("op_386_cast_fp16")];
+            tensor<int32, [4]> var_387 = const()[name = tensor<string, []>("op_387"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_388_cast_fp16 = reshape(shape = var_387, x = key_5_cast_fp16)[name = tensor<string, []>("op_388_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_386_cast_fp16, y = var_388_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_396_cast_fp16 = softmax(axis = var_310, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_396_cast_fp16")];
+            tensor<int32, [4]> var_397 = const()[name = tensor<string, []>("op_397"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_398_cast_fp16 = reshape(shape = var_397, x = value_5_cast_fp16)[name = tensor<string, []>("op_398_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_398_cast_fp16, y = var_396_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_11_cast_fp16 = reshape(shape = var_401, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102803712)))];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103983424)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_407, groups = var_317, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_405, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> var_417 = const()[name = tensor<string, []>("op_417"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_417, keep_dims = var_318, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_421 = const()[name = tensor<string, []>("op_421"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_422_cast_fp16 = reduce_mean(axes = var_421, keep_dims = var_318, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_422_cast_fp16")];
+            tensor<fp16, []> var_423_to_fp16 = const()[name = tensor<string, []>("op_423_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_424_cast_fp16 = add(x = var_422_cast_fp16, y = var_423_to_fp16)[name = tensor<string, []>("op_424_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_424_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103985024)))];
+            tensor<fp16, [768]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103986624)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<int32, [2]> var_439 = const()[name = tensor<string, []>("op_439"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_441 = const()[name = tensor<string, []>("op_441"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103988224)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105167936)))];
+            tensor<fp16, [1, 768, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_441, groups = var_317, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_439, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_445 = const()[name = tensor<string, []>("op_445"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_447 = const()[name = tensor<string, []>("op_447"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105169536)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_447, groups = var_317, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_445, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_452 = const()[name = tensor<string, []>("op_452"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_454 = const()[name = tensor<string, []>("op_454"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106349248)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107528960)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = var_454, groups = var_317, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_452, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_458 = const()[name = tensor<string, []>("op_458"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_459_cast_fp16 = reshape(shape = var_458, x = query_7_cast_fp16)[name = tensor<string, []>("op_459_cast_fp16")];
+            tensor<fp16, []> var_460_to_fp16 = const()[name = tensor<string, []>("op_460_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_461_cast_fp16 = mul(x = var_459_cast_fp16, y = var_460_to_fp16)[name = tensor<string, []>("op_461_cast_fp16")];
+            tensor<int32, [4]> var_462 = const()[name = tensor<string, []>("op_462"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_463_cast_fp16 = reshape(shape = var_462, x = key_7_cast_fp16)[name = tensor<string, []>("op_463_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_461_cast_fp16, y = var_463_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_310, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_467 = const()[name = tensor<string, []>("op_467"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_468_cast_fp16 = reshape(shape = var_467, x = value_7_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_468_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_13_cast_fp16 = reshape(shape = var_471, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<int32, [2]> var_475 = const()[name = tensor<string, []>("op_475"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_477 = const()[name = tensor<string, []>("op_477"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107530560)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108710272)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_477, groups = var_317, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_475, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_483 = const()[name = tensor<string, []>("op_483"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_483, keep_dims = var_318, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_487 = const()[name = tensor<string, []>("op_487"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_488_cast_fp16 = reduce_mean(axes = var_487, keep_dims = var_318, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<fp16, []> var_489_to_fp16 = const()[name = tensor<string, []>("op_489_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_490_cast_fp16 = add(x = var_488_cast_fp16, y = var_489_to_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_490_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108711872)))];
+            tensor<fp16, [768]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108713472)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_501 = const()[name = tensor<string, []>("op_501"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_503 = const()[name = tensor<string, []>("op_503"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108715072)))];
+            tensor<fp16, [3072]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113433728)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_503, groups = var_317, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_501, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_509 = const()[name = tensor<string, []>("op_509"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_511 = const()[name = tensor<string, []>("op_511"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113439936)))];
+            tensor<fp16, [768]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118158592)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_511, groups = var_317, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_509, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_524 = const()[name = tensor<string, []>("op_524"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_531 = const()[name = tensor<string, []>("op_531"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_532 = const()[name = tensor<string, []>("op_532"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_544 = const()[name = tensor<string, []>("op_544"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_544, keep_dims = var_532, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_548 = const()[name = tensor<string, []>("op_548"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_549_cast_fp16 = reduce_mean(axes = var_548, keep_dims = var_532, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<fp16, []> var_550_to_fp16 = const()[name = tensor<string, []>("op_550_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_551_cast_fp16 = add(x = var_549_cast_fp16, y = var_550_to_fp16)[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_551_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118160192)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118161792)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_566 = const()[name = tensor<string, []>("op_566"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_568 = const()[name = tensor<string, []>("op_568"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118163392)))];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119343104)))];
+            tensor<fp16, [1, 768, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_568, groups = var_531, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_566, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_572 = const()[name = tensor<string, []>("op_572"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_574 = const()[name = tensor<string, []>("op_574"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119344704)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_5_cast_fp16 = conv(dilations = var_574, groups = var_531, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = var_572, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<int32, [2]> var_579 = const()[name = tensor<string, []>("op_579"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_581 = const()[name = tensor<string, []>("op_581"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120524416)))];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121704128)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_581, groups = var_531, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = var_579, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_588_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_590_cast_fp16 = mul(x = var_63_cast_fp16_2, y = var_161_cast_fp16)[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_9_cast_fp16 = add(x = var_588_cast_fp16, y = var_590_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_592_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_592_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_594_cast_fp16 = mul(x = var_78_cast_fp16_2, y = var_161_cast_fp16)[name = tensor<string, []>("op_594_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_9_cast_fp16 = add(x = var_592_cast_fp16, y = var_594_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_597 = const()[name = tensor<string, []>("op_597"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_598_cast_fp16 = reshape(shape = var_597, x = query_9_cast_fp16)[name = tensor<string, []>("op_598_cast_fp16")];
+            tensor<fp16, []> var_599_to_fp16 = const()[name = tensor<string, []>("op_599_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor<string, []>("op_600_cast_fp16")];
+            tensor<int32, [4]> var_601 = const()[name = tensor<string, []>("op_601"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_602_cast_fp16 = reshape(shape = var_601, x = key_9_cast_fp16)[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_600_cast_fp16, y = var_602_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_610_cast_fp16 = softmax(axis = var_524, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_610_cast_fp16")];
+            tensor<int32, [4]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_612_cast_fp16 = reshape(shape = var_611, x = value_9_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_612_cast_fp16, y = var_610_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_615 = const()[name = tensor<string, []>("op_615"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_21_cast_fp16 = reshape(shape = var_615, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<int32, [2]> var_619 = const()[name = tensor<string, []>("op_619"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121705728)))];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122885440)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_621, groups = var_531, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_619, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_631 = const()[name = tensor<string, []>("op_631"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_631, keep_dims = var_532, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_635 = const()[name = tensor<string, []>("op_635"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_636_cast_fp16 = reduce_mean(axes = var_635, keep_dims = var_532, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_636_cast_fp16")];
+            tensor<fp16, []> var_637_to_fp16 = const()[name = tensor<string, []>("op_637_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_638_cast_fp16 = add(x = var_636_cast_fp16, y = var_637_to_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_638_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122887040)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122888640)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_653 = const()[name = tensor<string, []>("op_653"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_655 = const()[name = tensor<string, []>("op_655"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122890240)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124069952)))];
+            tensor<fp16, [1, 768, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = var_655, groups = var_531, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_653, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_659 = const()[name = tensor<string, []>("op_659"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124071552)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_661, groups = var_531, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_659, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_666 = const()[name = tensor<string, []>("op_666"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_668 = const()[name = tensor<string, []>("op_668"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125251264)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126430976)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = var_668, groups = var_531, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_666, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_672 = const()[name = tensor<string, []>("op_672"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_673_cast_fp16 = reshape(shape = var_672, x = query_11_cast_fp16)[name = tensor<string, []>("op_673_cast_fp16")];
+            tensor<fp16, []> var_674_to_fp16 = const()[name = tensor<string, []>("op_674_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_675_cast_fp16 = mul(x = var_673_cast_fp16, y = var_674_to_fp16)[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<int32, [4]> var_676 = const()[name = tensor<string, []>("op_676"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_677_cast_fp16 = reshape(shape = var_676, x = key_11_cast_fp16)[name = tensor<string, []>("op_677_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_675_cast_fp16, y = var_677_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_524, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_681 = const()[name = tensor<string, []>("op_681"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_682_cast_fp16 = reshape(shape = var_681, x = value_11_cast_fp16)[name = tensor<string, []>("op_682_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_682_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_685 = const()[name = tensor<string, []>("op_685"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_23_cast_fp16 = reshape(shape = var_685, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_689 = const()[name = tensor<string, []>("op_689"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_691 = const()[name = tensor<string, []>("op_691"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126432576)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127612288)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = var_691, groups = var_531, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_689, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> var_697 = const()[name = tensor<string, []>("op_697"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_697, keep_dims = var_532, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_701 = const()[name = tensor<string, []>("op_701"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_702_cast_fp16 = reduce_mean(axes = var_701, keep_dims = var_532, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_702_cast_fp16")];
+            tensor<fp16, []> var_703_to_fp16 = const()[name = tensor<string, []>("op_703_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_704_cast_fp16 = add(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor<string, []>("op_704_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_704_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [768]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127613888)))];
+            tensor<fp16, [768]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127615488)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_715 = const()[name = tensor<string, []>("op_715"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_717 = const()[name = tensor<string, []>("op_717"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127617088)))];
+            tensor<fp16, [3072]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132335744)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_717, groups = var_531, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = var_715, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<int32, [2]> var_723 = const()[name = tensor<string, []>("op_723"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_725 = const()[name = tensor<string, []>("op_725"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132341952)))];
+            tensor<fp16, [768]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137060608)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_725, groups = var_531, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_723, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_745 = const()[name = tensor<string, []>("op_745"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_746 = const()[name = tensor<string, []>("op_746"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_758 = const()[name = tensor<string, []>("op_758"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_758, keep_dims = var_746, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_763_cast_fp16 = reduce_mean(axes = var_762, keep_dims = var_746, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<fp16, []> var_764_to_fp16 = const()[name = tensor<string, []>("op_764_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_765_cast_fp16 = add(x = var_763_cast_fp16, y = var_764_to_fp16)[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_765_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [768]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137062208)))];
+            tensor<fp16, [768]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137063808)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<int32, [2]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_782 = const()[name = tensor<string, []>("op_782"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137065408)))];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138245120)))];
+            tensor<fp16, [1, 768, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_782, groups = var_745, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_780, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_786 = const()[name = tensor<string, []>("op_786"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_788 = const()[name = tensor<string, []>("op_788"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_7_pad_type_0 = const()[name = tensor<string, []>("current_key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_7_pad_0 = const()[name = tensor<string, []>("current_key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138246720)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_7_cast_fp16 = conv(dilations = var_788, groups = var_745, pad = current_key_7_pad_0, pad_type = current_key_7_pad_type_0, strides = var_786, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_7_cast_fp16")];
+            tensor<int32, [2]> var_793 = const()[name = tensor<string, []>("op_793"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_795 = const()[name = tensor<string, []>("op_795"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_7_pad_type_0 = const()[name = tensor<string, []>("current_value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_7_pad_0 = const()[name = tensor<string, []>("current_value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139426432)))];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140606144)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_795, groups = var_745, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = var_793, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_802_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_804_cast_fp16 = mul(x = var_63_cast_fp16_3, y = var_161_cast_fp16)[name = tensor<string, []>("op_804_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_13_cast_fp16 = add(x = var_802_cast_fp16, y = var_804_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_806_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_806_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_808_cast_fp16 = mul(x = var_78_cast_fp16_3, y = var_161_cast_fp16)[name = tensor<string, []>("op_808_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_13_cast_fp16 = add(x = var_806_cast_fp16, y = var_808_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_812_cast_fp16 = reshape(shape = var_811, x = query_13_cast_fp16)[name = tensor<string, []>("op_812_cast_fp16")];
+            tensor<fp16, []> var_813_to_fp16 = const()[name = tensor<string, []>("op_813_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_814_cast_fp16 = mul(x = var_812_cast_fp16, y = var_813_to_fp16)[name = tensor<string, []>("op_814_cast_fp16")];
+            tensor<int32, [4]> var_815 = const()[name = tensor<string, []>("op_815"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_816_cast_fp16 = reshape(shape = var_815, x = key_13_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_814_cast_fp16, y = var_816_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_824_cast_fp16 = softmax(axis = var_738, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<int32, [4]> var_825 = const()[name = tensor<string, []>("op_825"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_826_cast_fp16 = reshape(shape = var_825, x = value_13_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_826_cast_fp16, y = var_824_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_829 = const()[name = tensor<string, []>("op_829"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_31_cast_fp16 = reshape(shape = var_829, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_833 = const()[name = tensor<string, []>("op_833"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_835 = const()[name = tensor<string, []>("op_835"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140607744)))];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141787456)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_835, groups = var_745, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = var_833, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> var_845 = const()[name = tensor<string, []>("op_845"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_845, keep_dims = var_746, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_849 = const()[name = tensor<string, []>("op_849"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_850_cast_fp16 = reduce_mean(axes = var_849, keep_dims = var_746, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_850_cast_fp16")];
+            tensor<fp16, []> var_851_to_fp16 = const()[name = tensor<string, []>("op_851_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_852_cast_fp16 = add(x = var_850_cast_fp16, y = var_851_to_fp16)[name = tensor<string, []>("op_852_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_852_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141789056)))];
+            tensor<fp16, [768]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141790656)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<int32, [2]> var_867 = const()[name = tensor<string, []>("op_867"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_869 = const()[name = tensor<string, []>("op_869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141792256)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142971968)))];
+            tensor<fp16, [1, 768, 1, 1]> query_15_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = var_869, groups = var_745, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_867, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<int32, [2]> var_873 = const()[name = tensor<string, []>("op_873"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142973568)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = conv(dilations = var_875, groups = var_745, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_873, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<int32, [2]> var_880 = const()[name = tensor<string, []>("op_880"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_882 = const()[name = tensor<string, []>("op_882"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144153280)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145332992)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = var_882, groups = var_745, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_880, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_886 = const()[name = tensor<string, []>("op_886"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_887_cast_fp16 = reshape(shape = var_886, x = query_15_cast_fp16)[name = tensor<string, []>("op_887_cast_fp16")];
+            tensor<fp16, []> var_888_to_fp16 = const()[name = tensor<string, []>("op_888_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_889_cast_fp16 = mul(x = var_887_cast_fp16, y = var_888_to_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [4]> var_890 = const()[name = tensor<string, []>("op_890"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_891_cast_fp16 = reshape(shape = var_890, x = key_15_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<bool, []> mh_w_23_transpose_x_0 = const()[name = tensor<string, []>("mh_w_23_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_23_transpose_y_0 = const()[name = tensor<string, []>("mh_w_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_889_cast_fp16, y = var_891_cast_fp16)[name = tensor<string, []>("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_738, x = mh_w_23_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_895 = const()[name = tensor<string, []>("op_895"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_896_cast_fp16 = reshape(shape = var_895, x = value_15_cast_fp16)[name = tensor<string, []>("op_896_cast_fp16")];
+            tensor<bool, []> attn_15_transpose_x_0 = const()[name = tensor<string, []>("attn_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_15_transpose_y_0 = const()[name = tensor<string, []>("attn_15_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_896_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_899 = const()[name = tensor<string, []>("op_899"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_33_cast_fp16 = reshape(shape = var_899, x = attn_15_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_903 = const()[name = tensor<string, []>("op_903"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_905 = const()[name = tensor<string, []>("op_905"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145334592)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146514304)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = var_905, groups = var_745, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = var_903, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_911 = const()[name = tensor<string, []>("op_911"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_911, keep_dims = var_746, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_915 = const()[name = tensor<string, []>("op_915"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_916_cast_fp16 = reduce_mean(axes = var_915, keep_dims = var_746, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<fp16, []> var_917_to_fp16 = const()[name = tensor<string, []>("op_917_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_918_cast_fp16 = add(x = var_916_cast_fp16, y = var_917_to_fp16)[name = tensor<string, []>("op_918_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_918_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146515904)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146517504)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_929 = const()[name = tensor<string, []>("op_929"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_931 = const()[name = tensor<string, []>("op_931"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146519104)))];
+            tensor<fp16, [3072]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151237760)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_931, groups = var_745, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_929, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_937 = const()[name = tensor<string, []>("op_937"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_939 = const()[name = tensor<string, []>("op_939"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151243968)))];
+            tensor<fp16, [768]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155962624)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_939, groups = var_745, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_937, weight = layers_3_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_952 = const()[name = tensor<string, []>("op_952"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_959 = const()[name = tensor<string, []>("op_959"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_960 = const()[name = tensor<string, []>("op_960"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_972 = const()[name = tensor<string, []>("op_972"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_25_cast_fp16 = reduce_mean(axes = var_972, keep_dims = var_960, x = inputs_25_cast_fp16)[name = tensor<string, []>("channels_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_sq_25_cast_fp16")];
+            tensor<int32, [1]> var_976 = const()[name = tensor<string, []>("op_976"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_977_cast_fp16 = reduce_mean(axes = var_976, keep_dims = var_960, x = zero_mean_sq_25_cast_fp16)[name = tensor<string, []>("op_977_cast_fp16")];
+            tensor<fp16, []> var_978_to_fp16 = const()[name = tensor<string, []>("op_978_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_979_cast_fp16 = add(x = var_977_cast_fp16, y = var_978_to_fp16)[name = tensor<string, []>("op_979_cast_fp16")];
+            tensor<fp16, []> denom_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_979_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155964224)))];
+            tensor<fp16, [768]> obj_57_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_57_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155965824)))];
+            tensor<fp16, []> obj_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_57_cast_fp16")];
+            tensor<int32, [2]> var_994 = const()[name = tensor<string, []>("op_994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_996 = const()[name = tensor<string, []>("op_996"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155967424)))];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157147136)))];
+            tensor<fp16, [1, 768, 1, 1]> query_17_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_996, groups = var_959, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_994, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<int32, [2]> var_1000 = const()[name = tensor<string, []>("op_1000"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1002 = const()[name = tensor<string, []>("op_1002"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_9_pad_type_0 = const()[name = tensor<string, []>("current_key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_9_pad_0 = const()[name = tensor<string, []>("current_key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157148736)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_9_cast_fp16 = conv(dilations = var_1002, groups = var_959, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = var_1000, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_key_9_cast_fp16")];
+            tensor<int32, [2]> var_1007 = const()[name = tensor<string, []>("op_1007"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1009 = const()[name = tensor<string, []>("op_1009"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_9_pad_type_0 = const()[name = tensor<string, []>("current_value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_9_pad_0 = const()[name = tensor<string, []>("current_value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158328448)))];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159508160)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_1009, groups = var_959, pad = current_value_9_pad_0, pad_type = current_value_9_pad_type_0, strides = var_1007, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1016_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1016_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1018_cast_fp16 = mul(x = var_63_cast_fp16_4, y = var_161_cast_fp16)[name = tensor<string, []>("op_1018_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_17_cast_fp16 = add(x = var_1016_cast_fp16, y = var_1018_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1020_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1020_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1022_cast_fp16 = mul(x = var_78_cast_fp16_4, y = var_161_cast_fp16)[name = tensor<string, []>("op_1022_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_17_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1022_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1025 = const()[name = tensor<string, []>("op_1025"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1026_cast_fp16 = reshape(shape = var_1025, x = query_17_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<fp16, []> var_1027_to_fp16 = const()[name = tensor<string, []>("op_1027_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1028_cast_fp16 = mul(x = var_1026_cast_fp16, y = var_1027_to_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<int32, [4]> var_1029 = const()[name = tensor<string, []>("op_1029"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1030_cast_fp16 = reshape(shape = var_1029, x = key_17_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<bool, []> mh_w_25_transpose_x_0 = const()[name = tensor<string, []>("mh_w_25_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_25_transpose_y_0 = const()[name = tensor<string, []>("mh_w_25_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1028_cast_fp16, y = var_1030_cast_fp16)[name = tensor<string, []>("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1038_cast_fp16 = softmax(axis = var_952, x = mh_w_27_cast_fp16)[name = tensor<string, []>("op_1038_cast_fp16")];
+            tensor<int32, [4]> var_1039 = const()[name = tensor<string, []>("op_1039"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1040_cast_fp16 = reshape(shape = var_1039, x = value_17_cast_fp16)[name = tensor<string, []>("op_1040_cast_fp16")];
+            tensor<bool, []> attn_17_transpose_x_0 = const()[name = tensor<string, []>("attn_17_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_17_transpose_y_0 = const()[name = tensor<string, []>("attn_17_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1040_cast_fp16, y = var_1038_cast_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1043 = const()[name = tensor<string, []>("op_1043"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1043, x = attn_17_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_1047 = const()[name = tensor<string, []>("op_1047"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1049 = const()[name = tensor<string, []>("op_1049"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_63_pad_type_0 = const()[name = tensor<string, []>("obj_63_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_63_pad_0 = const()[name = tensor<string, []>("obj_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159509760)))];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160689472)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_63_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_1049, groups = var_959, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = var_1047, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_27_cast_fp16 = reduce_mean(axes = var_1059, keep_dims = var_960, x = inputs_27_cast_fp16)[name = tensor<string, []>("channels_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_sq_27_cast_fp16")];
+            tensor<int32, [1]> var_1063 = const()[name = tensor<string, []>("op_1063"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1064_cast_fp16 = reduce_mean(axes = var_1063, keep_dims = var_960, x = zero_mean_sq_27_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, []> var_1065_to_fp16 = const()[name = tensor<string, []>("op_1065_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1066_cast_fp16 = add(x = var_1064_cast_fp16, y = var_1065_to_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<fp16, []> denom_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_1066_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [768]> obj_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160691072)))];
+            tensor<fp16, [768]> obj_65_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160692672)))];
+            tensor<fp16, []> obj_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("obj_65_cast_fp16")];
+            tensor<int32, [2]> var_1081 = const()[name = tensor<string, []>("op_1081"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1083 = const()[name = tensor<string, []>("op_1083"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160694272)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161873984)))];
+            tensor<fp16, [1, 768, 1, 1]> query_19_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_bias_to_fp16, dilations = var_1083, groups = var_959, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_1081, weight = layers_4_encoder_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<int32, [2]> var_1087 = const()[name = tensor<string, []>("op_1087"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1089 = const()[name = tensor<string, []>("op_1089"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161875584)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = conv(dilations = var_1089, groups = var_959, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_1087, weight = layers_4_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<int32, [2]> var_1094 = const()[name = tensor<string, []>("op_1094"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163055296)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164235008)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_bias_to_fp16, dilations = var_1096, groups = var_959, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_1094, weight = layers_4_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1100 = const()[name = tensor<string, []>("op_1100"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1101_cast_fp16 = reshape(shape = var_1100, x = query_19_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, []> var_1102_to_fp16 = const()[name = tensor<string, []>("op_1102_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1103_cast_fp16 = mul(x = var_1101_cast_fp16, y = var_1102_to_fp16)[name = tensor<string, []>("op_1103_cast_fp16")];
+            tensor<int32, [4]> var_1104 = const()[name = tensor<string, []>("op_1104"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1105_cast_fp16 = reshape(shape = var_1104, x = key_19_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<bool, []> mh_w_29_transpose_x_0 = const()[name = tensor<string, []>("mh_w_29_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_29_transpose_y_0 = const()[name = tensor<string, []>("mh_w_29_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1103_cast_fp16, y = var_1105_cast_fp16)[name = tensor<string, []>("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_69_cast_fp16 = softmax(axis = var_952, x = mh_w_29_cast_fp16)[name = tensor<string, []>("obj_69_cast_fp16")];
+            tensor<int32, [4]> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1110_cast_fp16 = reshape(shape = var_1109, x = value_19_cast_fp16)[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<bool, []> attn_19_transpose_x_0 = const()[name = tensor<string, []>("attn_19_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_19_transpose_y_0 = const()[name = tensor<string, []>("attn_19_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1110_cast_fp16, y = obj_69_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1113 = const()[name = tensor<string, []>("op_1113"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1113, x = attn_19_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_1117 = const()[name = tensor<string, []>("op_1117"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1119 = const()[name = tensor<string, []>("op_1119"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_67_pad_type_0 = const()[name = tensor<string, []>("obj_67_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_67_pad_0 = const()[name = tensor<string, []>("obj_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164236608)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165416320)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_67_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_bias_to_fp16, dilations = var_1119, groups = var_959, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = var_1117, weight = layers_4_encoder_attn_o_proj_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("obj_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, [1]> var_1125 = const()[name = tensor<string, []>("op_1125"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_29_cast_fp16 = reduce_mean(axes = var_1125, keep_dims = var_960, x = inputs_29_cast_fp16)[name = tensor<string, []>("channels_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_sq_29_cast_fp16")];
+            tensor<int32, [1]> var_1129 = const()[name = tensor<string, []>("op_1129"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1130_cast_fp16 = reduce_mean(axes = var_1129, keep_dims = var_960, x = zero_mean_sq_29_cast_fp16)[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<fp16, []> var_1131_to_fp16 = const()[name = tensor<string, []>("op_1131_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1132_cast_fp16 = add(x = var_1130_cast_fp16, y = var_1131_to_fp16)[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<fp16, []> denom_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_1132_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [768]> input_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165417920)))];
+            tensor<fp16, [768]> input_45_beta_0_to_fp16 = const()[name = tensor<string, []>("input_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165419520)))];
+            tensor<fp16, []> input_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<int32, [2]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1145 = const()[name = tensor<string, []>("op_1145"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_47_pad_type_0 = const()[name = tensor<string, []>("input_47_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_47_pad_0 = const()[name = tensor<string, []>("input_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165421120)))];
+            tensor<fp16, [3072]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(170139776)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_47_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_1145, groups = var_959, pad = input_47_pad_0, pad_type = input_47_pad_type_0, strides = var_1143, weight = layers_4_fc1_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_mode_0 = const()[name = tensor<string, []>("input_49_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [2]> var_1151 = const()[name = tensor<string, []>("op_1151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1153 = const()[name = tensor<string, []>("op_1153"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(170145984)))];
+            tensor<fp16, [768]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174864640)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_11_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_1153, groups = var_959, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_1151, weight = layers_4_fc2_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, []> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1173 = const()[name = tensor<string, []>("op_1173"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1174 = const()[name = tensor<string, []>("op_1174"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1186 = const()[name = tensor<string, []>("op_1186"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_31_cast_fp16 = reduce_mean(axes = var_1186, keep_dims = var_1174, x = inputs_31_cast_fp16)[name = tensor<string, []>("channels_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_sq_31_cast_fp16")];
+            tensor<int32, [1]> var_1190 = const()[name = tensor<string, []>("op_1190"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1191_cast_fp16 = reduce_mean(axes = var_1190, keep_dims = var_1174, x = zero_mean_sq_31_cast_fp16)[name = tensor<string, []>("op_1191_cast_fp16")];
+            tensor<fp16, []> var_1192_to_fp16 = const()[name = tensor<string, []>("op_1192_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1193_cast_fp16 = add(x = var_1191_cast_fp16, y = var_1192_to_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<fp16, []> denom_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_1193_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [768]> obj_71_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_71_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174866240)))];
+            tensor<fp16, [768]> obj_71_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_71_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174867840)))];
+            tensor<fp16, []> obj_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("obj_71_cast_fp16")];
+            tensor<int32, [2]> var_1208 = const()[name = tensor<string, []>("op_1208"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1210 = const()[name = tensor<string, []>("op_1210"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174869440)))];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176049152)))];
+            tensor<fp16, [1, 768, 1, 1]> query_21_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_1210, groups = var_1173, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_1208, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<int32, [2]> var_1214 = const()[name = tensor<string, []>("op_1214"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1216 = const()[name = tensor<string, []>("op_1216"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_11_pad_type_0 = const()[name = tensor<string, []>("current_key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_11_pad_0 = const()[name = tensor<string, []>("current_key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176050752)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_11_cast_fp16 = conv(dilations = var_1216, groups = var_1173, pad = current_key_11_pad_0, pad_type = current_key_11_pad_type_0, strides = var_1214, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_key_11_cast_fp16")];
+            tensor<int32, [2]> var_1221 = const()[name = tensor<string, []>("op_1221"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1223 = const()[name = tensor<string, []>("op_1223"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_11_pad_type_0 = const()[name = tensor<string, []>("current_value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_11_pad_0 = const()[name = tensor<string, []>("current_value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177230464)))];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178410176)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_1223, groups = var_1173, pad = current_value_11_pad_0, pad_type = current_value_11_pad_type_0, strides = var_1221, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_value_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1230_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1232_cast_fp16 = mul(x = var_63_cast_fp16_5, y = var_161_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_21_cast_fp16 = add(x = var_1230_cast_fp16, y = var_1232_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1234_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1236_cast_fp16 = mul(x = var_78_cast_fp16_5, y = var_161_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_21_cast_fp16 = add(x = var_1234_cast_fp16, y = var_1236_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_1239 = const()[name = tensor<string, []>("op_1239"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1240_cast_fp16 = reshape(shape = var_1239, x = query_21_cast_fp16)[name = tensor<string, []>("op_1240_cast_fp16")];
+            tensor<fp16, []> var_1241_to_fp16 = const()[name = tensor<string, []>("op_1241_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<int32, [4]> var_1243 = const()[name = tensor<string, []>("op_1243"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1244_cast_fp16 = reshape(shape = var_1243, x = key_21_cast_fp16)[name = tensor<string, []>("op_1244_cast_fp16")];
+            tensor<bool, []> mh_w_31_transpose_x_0 = const()[name = tensor<string, []>("mh_w_31_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_31_transpose_y_0 = const()[name = tensor<string, []>("mh_w_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1242_cast_fp16, y = var_1244_cast_fp16)[name = tensor<string, []>("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1252_cast_fp16 = softmax(axis = var_1166, x = mh_w_33_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [4]> var_1253 = const()[name = tensor<string, []>("op_1253"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1254_cast_fp16 = reshape(shape = var_1253, x = value_21_cast_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<bool, []> attn_21_transpose_x_0 = const()[name = tensor<string, []>("attn_21_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_21_transpose_y_0 = const()[name = tensor<string, []>("attn_21_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1254_cast_fp16, y = var_1252_cast_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1257 = const()[name = tensor<string, []>("op_1257"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1257, x = attn_21_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<int32, [2]> var_1261 = const()[name = tensor<string, []>("op_1261"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1263 = const()[name = tensor<string, []>("op_1263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_77_pad_type_0 = const()[name = tensor<string, []>("obj_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_77_pad_0 = const()[name = tensor<string, []>("obj_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178411776)))];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179591488)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_77_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_1263, groups = var_1173, pad = obj_77_pad_0, pad_type = obj_77_pad_type_0, strides = var_1261, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("obj_77_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, [1]> var_1273 = const()[name = tensor<string, []>("op_1273"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_33_cast_fp16 = reduce_mean(axes = var_1273, keep_dims = var_1174, x = inputs_33_cast_fp16)[name = tensor<string, []>("channels_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_sq_33_cast_fp16")];
+            tensor<int32, [1]> var_1277 = const()[name = tensor<string, []>("op_1277"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1278_cast_fp16 = reduce_mean(axes = var_1277, keep_dims = var_1174, x = zero_mean_sq_33_cast_fp16)[name = tensor<string, []>("op_1278_cast_fp16")];
+            tensor<fp16, []> var_1279_to_fp16 = const()[name = tensor<string, []>("op_1279_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1280_cast_fp16 = add(x = var_1278_cast_fp16, y = var_1279_to_fp16)[name = tensor<string, []>("op_1280_cast_fp16")];
+            tensor<fp16, []> denom_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_1280_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_79_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179593088)))];
+            tensor<fp16, [768]> obj_79_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_79_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179594688)))];
+            tensor<fp16, []> obj_79_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_79_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_79_cast_fp16")];
+            tensor<int32, [2]> var_1295 = const()[name = tensor<string, []>("op_1295"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1297 = const()[name = tensor<string, []>("op_1297"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_23_pad_type_0 = const()[name = tensor<string, []>("query_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_23_pad_0 = const()[name = tensor<string, []>("query_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179596288)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180776000)))];
+            tensor<fp16, [1, 768, 1, 1]> query_23_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_bias_to_fp16, dilations = var_1297, groups = var_1173, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = var_1295, weight = layers_5_encoder_attn_q_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor<string, []>("query_23_cast_fp16")];
+            tensor<int32, [2]> var_1301 = const()[name = tensor<string, []>("op_1301"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1303 = const()[name = tensor<string, []>("op_1303"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_23_pad_type_0 = const()[name = tensor<string, []>("key_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_23_pad_0 = const()[name = tensor<string, []>("key_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180777600)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_23_cast_fp16 = conv(dilations = var_1303, groups = var_1173, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = var_1301, weight = layers_5_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_23_cast_fp16")];
+            tensor<int32, [2]> var_1308 = const()[name = tensor<string, []>("op_1308"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1310 = const()[name = tensor<string, []>("op_1310"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_23_pad_type_0 = const()[name = tensor<string, []>("value_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_23_pad_0 = const()[name = tensor<string, []>("value_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181957312)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183137024)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_23_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_bias_to_fp16, dilations = var_1310, groups = var_1173, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = var_1308, weight = layers_5_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_23_cast_fp16")];
+            tensor<int32, [4]> var_1314 = const()[name = tensor<string, []>("op_1314"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1315_cast_fp16 = reshape(shape = var_1314, x = query_23_cast_fp16)[name = tensor<string, []>("op_1315_cast_fp16")];
+            tensor<fp16, []> var_1316_to_fp16 = const()[name = tensor<string, []>("op_1316_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1317_cast_fp16 = mul(x = var_1315_cast_fp16, y = var_1316_to_fp16)[name = tensor<string, []>("op_1317_cast_fp16")];
+            tensor<int32, [4]> var_1318 = const()[name = tensor<string, []>("op_1318"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1319_cast_fp16 = reshape(shape = var_1318, x = key_23_cast_fp16)[name = tensor<string, []>("op_1319_cast_fp16")];
+            tensor<bool, []> mh_w_35_transpose_x_0 = const()[name = tensor<string, []>("mh_w_35_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_35_transpose_y_0 = const()[name = tensor<string, []>("mh_w_35_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_1317_cast_fp16, y = var_1319_cast_fp16)[name = tensor<string, []>("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_83_cast_fp16 = softmax(axis = var_1166, x = mh_w_35_cast_fp16)[name = tensor<string, []>("obj_83_cast_fp16")];
+            tensor<int32, [4]> var_1323 = const()[name = tensor<string, []>("op_1323"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1324_cast_fp16 = reshape(shape = var_1323, x = value_23_cast_fp16)[name = tensor<string, []>("op_1324_cast_fp16")];
+            tensor<bool, []> attn_23_transpose_x_0 = const()[name = tensor<string, []>("attn_23_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_23_transpose_y_0 = const()[name = tensor<string, []>("attn_23_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1324_cast_fp16, y = obj_83_cast_fp16)[name = tensor<string, []>("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1327 = const()[name = tensor<string, []>("op_1327"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1327, x = attn_23_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<int32, [2]> var_1331 = const()[name = tensor<string, []>("op_1331"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1333 = const()[name = tensor<string, []>("op_1333"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_81_pad_type_0 = const()[name = tensor<string, []>("obj_81_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_81_pad_0 = const()[name = tensor<string, []>("obj_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183138624)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184318336)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_81_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_bias_to_fp16, dilations = var_1333, groups = var_1173, pad = obj_81_pad_0, pad_type = obj_81_pad_type_0, strides = var_1331, weight = layers_5_encoder_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("obj_81_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> var_1342 = const()[name = tensor<string, []>("op_1342"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_35_cast_fp16 = reduce_mean(axes = var_1342, keep_dims = var_1174, x = inputs_35_cast_fp16)[name = tensor<string, []>("channels_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_sq_35_cast_fp16")];
+            tensor<int32, [1]> var_1346 = const()[name = tensor<string, []>("op_1346"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1347_cast_fp16 = reduce_mean(axes = var_1346, keep_dims = var_1174, x = zero_mean_sq_35_cast_fp16)[name = tensor<string, []>("op_1347_cast_fp16")];
+            tensor<fp16, []> var_1348_to_fp16 = const()[name = tensor<string, []>("op_1348_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1349_cast_fp16 = add(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
+            tensor<fp16, []> denom_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_1349_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_55_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_55_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184319936)))];
+            tensor<fp16, [768]> input_55_beta_0_to_fp16 = const()[name = tensor<string, []>("input_55_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184321536)))];
+            tensor<fp16, []> input_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<int32, [2]> var_1360 = const()[name = tensor<string, []>("op_1360"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1362 = const()[name = tensor<string, []>("op_1362"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_57_pad_type_0 = const()[name = tensor<string, []>("input_57_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_57_pad_0 = const()[name = tensor<string, []>("input_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184323136)))];
+            tensor<fp16, [3072]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189041792)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_57_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_1362, groups = var_1173, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = var_1360, weight = layers_5_fc1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_mode_0 = const()[name = tensor<string, []>("input_59_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<int32, [2]> var_1368 = const()[name = tensor<string, []>("op_1368"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1370 = const()[name = tensor<string, []>("op_1370"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189048000)))];
+            tensor<fp16, [768]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193766656)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_13_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_1370, groups = var_1173, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_1368, weight = layers_5_fc2_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1384 = const()[name = tensor<string, []>("op_1384"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1391 = const()[name = tensor<string, []>("op_1391"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1392 = const()[name = tensor<string, []>("op_1392"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1404 = const()[name = tensor<string, []>("op_1404"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_37_cast_fp16 = reduce_mean(axes = var_1404, keep_dims = var_1392, x = inputs_37_cast_fp16)[name = tensor<string, []>("channels_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_37_cast_fp16 = sub(x = inputs_37_cast_fp16, y = channels_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = zero_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_sq_37_cast_fp16")];
+            tensor<int32, [1]> var_1408 = const()[name = tensor<string, []>("op_1408"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1409_cast_fp16 = reduce_mean(axes = var_1408, keep_dims = var_1392, x = zero_mean_sq_37_cast_fp16)[name = tensor<string, []>("op_1409_cast_fp16")];
+            tensor<fp16, []> var_1410_to_fp16 = const()[name = tensor<string, []>("op_1410_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1411_cast_fp16 = add(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = tensor<string, []>("op_1411_cast_fp16")];
+            tensor<fp16, []> denom_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0_to_fp16, x = var_1411_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = denom_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193768256)))];
+            tensor<fp16, [768]> obj_85_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193769856)))];
+            tensor<fp16, []> obj_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_85_cast_fp16")];
+            tensor<int32, [2]> var_1426 = const()[name = tensor<string, []>("op_1426"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1428 = const()[name = tensor<string, []>("op_1428"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_25_pad_type_0 = const()[name = tensor<string, []>("query_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_25_pad_0 = const()[name = tensor<string, []>("query_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193771456)))];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194951168)))];
+            tensor<fp16, [1, 768, 1, 1]> query_25_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = var_1428, groups = var_1391, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = var_1426, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("query_25_cast_fp16")];
+            tensor<int32, [2]> var_1432 = const()[name = tensor<string, []>("op_1432"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1434 = const()[name = tensor<string, []>("op_1434"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_13_pad_type_0 = const()[name = tensor<string, []>("current_key_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_13_pad_0 = const()[name = tensor<string, []>("current_key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194952768)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_13_cast_fp16 = conv(dilations = var_1434, groups = var_1391, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = var_1432, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_key_13_cast_fp16")];
+            tensor<int32, [2]> var_1439 = const()[name = tensor<string, []>("op_1439"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1441 = const()[name = tensor<string, []>("op_1441"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_13_pad_type_0 = const()[name = tensor<string, []>("current_value_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_13_pad_0 = const()[name = tensor<string, []>("current_value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196132480)))];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197312192)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = var_1441, groups = var_1391, pad = current_value_13_pad_0, pad_type = current_value_13_pad_type_0, strides = var_1439, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_value_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1448_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1448_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1450_cast_fp16 = mul(x = var_63_cast_fp16_6, y = var_161_cast_fp16)[name = tensor<string, []>("op_1450_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_25_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1450_cast_fp16)[name = tensor<string, []>("key_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1452_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1452_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1454_cast_fp16 = mul(x = var_78_cast_fp16_6, y = var_161_cast_fp16)[name = tensor<string, []>("op_1454_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_25_cast_fp16 = add(x = var_1452_cast_fp16, y = var_1454_cast_fp16)[name = tensor<string, []>("value_25_cast_fp16")];
+            tensor<int32, [4]> var_1457 = const()[name = tensor<string, []>("op_1457"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1458_cast_fp16 = reshape(shape = var_1457, x = query_25_cast_fp16)[name = tensor<string, []>("op_1458_cast_fp16")];
+            tensor<fp16, []> var_1459_to_fp16 = const()[name = tensor<string, []>("op_1459_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1460_cast_fp16 = mul(x = var_1458_cast_fp16, y = var_1459_to_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<int32, [4]> var_1461 = const()[name = tensor<string, []>("op_1461"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1462_cast_fp16 = reshape(shape = var_1461, x = key_25_cast_fp16)[name = tensor<string, []>("op_1462_cast_fp16")];
+            tensor<bool, []> mh_w_37_transpose_x_0 = const()[name = tensor<string, []>("mh_w_37_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_37_transpose_y_0 = const()[name = tensor<string, []>("mh_w_37_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1460_cast_fp16, y = var_1462_cast_fp16)[name = tensor<string, []>("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1470_cast_fp16 = softmax(axis = var_1384, x = mh_w_39_cast_fp16)[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<int32, [4]> var_1471 = const()[name = tensor<string, []>("op_1471"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1472_cast_fp16 = reshape(shape = var_1471, x = value_25_cast_fp16)[name = tensor<string, []>("op_1472_cast_fp16")];
+            tensor<bool, []> attn_25_transpose_x_0 = const()[name = tensor<string, []>("attn_25_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_25_transpose_y_0 = const()[name = tensor<string, []>("attn_25_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1472_cast_fp16, y = var_1470_cast_fp16)[name = tensor<string, []>("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1475 = const()[name = tensor<string, []>("op_1475"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_61_cast_fp16 = reshape(shape = var_1475, x = attn_25_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<int32, [2]> var_1479 = const()[name = tensor<string, []>("op_1479"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1481 = const()[name = tensor<string, []>("op_1481"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_91_pad_type_0 = const()[name = tensor<string, []>("obj_91_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_91_pad_0 = const()[name = tensor<string, []>("obj_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197313792)))];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198493504)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_91_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = var_1481, groups = var_1391, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = var_1479, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("obj_91_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_91_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> var_1491 = const()[name = tensor<string, []>("op_1491"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_39_cast_fp16 = reduce_mean(axes = var_1491, keep_dims = var_1392, x = inputs_39_cast_fp16)[name = tensor<string, []>("channels_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_39_cast_fp16 = sub(x = inputs_39_cast_fp16, y = channels_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = zero_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_sq_39_cast_fp16")];
+            tensor<int32, [1]> var_1495 = const()[name = tensor<string, []>("op_1495"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1496_cast_fp16 = reduce_mean(axes = var_1495, keep_dims = var_1392, x = zero_mean_sq_39_cast_fp16)[name = tensor<string, []>("op_1496_cast_fp16")];
+            tensor<fp16, []> var_1497_to_fp16 = const()[name = tensor<string, []>("op_1497_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1498_cast_fp16 = add(x = var_1496_cast_fp16, y = var_1497_to_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
+            tensor<fp16, []> denom_39_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_39_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0_to_fp16, x = var_1498_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = denom_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [768]> obj_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_93_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198495104)))];
+            tensor<fp16, [768]> obj_93_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_93_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198496704)))];
+            tensor<fp16, []> obj_93_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_93_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("obj_93_cast_fp16")];
+            tensor<int32, [2]> var_1513 = const()[name = tensor<string, []>("op_1513"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1515 = const()[name = tensor<string, []>("op_1515"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_27_pad_type_0 = const()[name = tensor<string, []>("query_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_27_pad_0 = const()[name = tensor<string, []>("query_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198498304)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199678016)))];
+            tensor<fp16, [1, 768, 1, 1]> query_27_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_bias_to_fp16, dilations = var_1515, groups = var_1391, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = var_1513, weight = layers_6_encoder_attn_q_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("query_27_cast_fp16")];
+            tensor<int32, [2]> var_1519 = const()[name = tensor<string, []>("op_1519"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1521 = const()[name = tensor<string, []>("op_1521"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_27_pad_type_0 = const()[name = tensor<string, []>("key_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_27_pad_0 = const()[name = tensor<string, []>("key_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199679616)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_27_cast_fp16 = conv(dilations = var_1521, groups = var_1391, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = var_1519, weight = layers_6_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_27_cast_fp16")];
+            tensor<int32, [2]> var_1526 = const()[name = tensor<string, []>("op_1526"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1528 = const()[name = tensor<string, []>("op_1528"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_27_pad_type_0 = const()[name = tensor<string, []>("value_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_27_pad_0 = const()[name = tensor<string, []>("value_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200859328)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202039040)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_27_cast_fp16 = conv(bias = layers_6_encoder_attn_v_proj_bias_to_fp16, dilations = var_1528, groups = var_1391, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = var_1526, weight = layers_6_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_27_cast_fp16")];
+            tensor<int32, [4]> var_1532 = const()[name = tensor<string, []>("op_1532"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1533_cast_fp16 = reshape(shape = var_1532, x = query_27_cast_fp16)[name = tensor<string, []>("op_1533_cast_fp16")];
+            tensor<fp16, []> var_1534_to_fp16 = const()[name = tensor<string, []>("op_1534_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1535_cast_fp16 = mul(x = var_1533_cast_fp16, y = var_1534_to_fp16)[name = tensor<string, []>("op_1535_cast_fp16")];
+            tensor<int32, [4]> var_1536 = const()[name = tensor<string, []>("op_1536"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1537_cast_fp16 = reshape(shape = var_1536, x = key_27_cast_fp16)[name = tensor<string, []>("op_1537_cast_fp16")];
+            tensor<bool, []> mh_w_41_transpose_x_0 = const()[name = tensor<string, []>("mh_w_41_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_41_transpose_y_0 = const()[name = tensor<string, []>("mh_w_41_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1535_cast_fp16, y = var_1537_cast_fp16)[name = tensor<string, []>("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_97_cast_fp16 = softmax(axis = var_1384, x = mh_w_41_cast_fp16)[name = tensor<string, []>("obj_97_cast_fp16")];
+            tensor<int32, [4]> var_1541 = const()[name = tensor<string, []>("op_1541"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1542_cast_fp16 = reshape(shape = var_1541, x = value_27_cast_fp16)[name = tensor<string, []>("op_1542_cast_fp16")];
+            tensor<bool, []> attn_27_transpose_x_0 = const()[name = tensor<string, []>("attn_27_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_27_transpose_y_0 = const()[name = tensor<string, []>("attn_27_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1542_cast_fp16, y = obj_97_cast_fp16)[name = tensor<string, []>("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1545 = const()[name = tensor<string, []>("op_1545"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_63_cast_fp16 = reshape(shape = var_1545, x = attn_27_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<int32, [2]> var_1549 = const()[name = tensor<string, []>("op_1549"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_95_pad_type_0 = const()[name = tensor<string, []>("obj_95_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_95_pad_0 = const()[name = tensor<string, []>("obj_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202040640)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203220352)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_95_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_bias_to_fp16, dilations = var_1551, groups = var_1391, pad = obj_95_pad_0, pad_type = obj_95_pad_type_0, strides = var_1549, weight = layers_6_encoder_attn_o_proj_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("obj_95_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_95_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, [1]> var_1557 = const()[name = tensor<string, []>("op_1557"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_41_cast_fp16 = reduce_mean(axes = var_1557, keep_dims = var_1392, x = inputs_41_cast_fp16)[name = tensor<string, []>("channels_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_41_cast_fp16 = sub(x = inputs_41_cast_fp16, y = channels_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = zero_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_sq_41_cast_fp16")];
+            tensor<int32, [1]> var_1561 = const()[name = tensor<string, []>("op_1561"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1562_cast_fp16 = reduce_mean(axes = var_1561, keep_dims = var_1392, x = zero_mean_sq_41_cast_fp16)[name = tensor<string, []>("op_1562_cast_fp16")];
+            tensor<fp16, []> var_1563_to_fp16 = const()[name = tensor<string, []>("op_1563_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1564_cast_fp16 = add(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<fp16, []> denom_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0_to_fp16, x = var_1564_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = denom_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [768]> input_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203221952)))];
+            tensor<fp16, [768]> input_65_beta_0_to_fp16 = const()[name = tensor<string, []>("input_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203223552)))];
+            tensor<fp16, []> input_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<int32, [2]> var_1575 = const()[name = tensor<string, []>("op_1575"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1577 = const()[name = tensor<string, []>("op_1577"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_67_pad_type_0 = const()[name = tensor<string, []>("input_67_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_67_pad_0 = const()[name = tensor<string, []>("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203225152)))];
+            tensor<fp16, [3072]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207943808)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_67_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = var_1577, groups = var_1391, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = var_1575, weight = layers_6_fc1_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<int32, [2]> var_1583 = const()[name = tensor<string, []>("op_1583"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1585 = const()[name = tensor<string, []>("op_1585"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207950016)))];
+            tensor<fp16, [768]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212668672)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_15_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = var_1585, groups = var_1391, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = var_1583, weight = layers_6_fc2_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, []> var_1598 = const()[name = tensor<string, []>("op_1598"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1605 = const()[name = tensor<string, []>("op_1605"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1606 = const()[name = tensor<string, []>("op_1606"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1618 = const()[name = tensor<string, []>("op_1618"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_43_cast_fp16 = reduce_mean(axes = var_1618, keep_dims = var_1606, x = inputs_43_cast_fp16)[name = tensor<string, []>("channels_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_43_cast_fp16 = sub(x = inputs_43_cast_fp16, y = channels_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = zero_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_sq_43_cast_fp16")];
+            tensor<int32, [1]> var_1622 = const()[name = tensor<string, []>("op_1622"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1623_cast_fp16 = reduce_mean(axes = var_1622, keep_dims = var_1606, x = zero_mean_sq_43_cast_fp16)[name = tensor<string, []>("op_1623_cast_fp16")];
+            tensor<fp16, []> var_1624_to_fp16 = const()[name = tensor<string, []>("op_1624_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1625_cast_fp16 = add(x = var_1623_cast_fp16, y = var_1624_to_fp16)[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<fp16, []> denom_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0_to_fp16, x = var_1625_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = denom_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [768]> obj_99_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_99_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212670272)))];
+            tensor<fp16, [768]> obj_99_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_99_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212671872)))];
+            tensor<fp16, []> obj_99_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_99_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_99_cast_fp16 = batch_norm(beta = obj_99_beta_0_to_fp16, epsilon = obj_99_epsilon_0_to_fp16, gamma = obj_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("obj_99_cast_fp16")];
+            tensor<int32, [2]> var_1640 = const()[name = tensor<string, []>("op_1640"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1642 = const()[name = tensor<string, []>("op_1642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_29_pad_type_0 = const()[name = tensor<string, []>("query_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_29_pad_0 = const()[name = tensor<string, []>("query_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212673472)))];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213853184)))];
+            tensor<fp16, [1, 768, 1, 1]> query_29_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = var_1642, groups = var_1605, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = var_1640, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("query_29_cast_fp16")];
+            tensor<int32, [2]> var_1646 = const()[name = tensor<string, []>("op_1646"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1648 = const()[name = tensor<string, []>("op_1648"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_15_pad_type_0 = const()[name = tensor<string, []>("current_key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_15_pad_0 = const()[name = tensor<string, []>("current_key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213854784)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_15_cast_fp16 = conv(dilations = var_1648, groups = var_1605, pad = current_key_15_pad_0, pad_type = current_key_15_pad_type_0, strides = var_1646, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_key_15_cast_fp16")];
+            tensor<int32, [2]> var_1653 = const()[name = tensor<string, []>("op_1653"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1655 = const()[name = tensor<string, []>("op_1655"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_15_pad_type_0 = const()[name = tensor<string, []>("current_value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_15_pad_0 = const()[name = tensor<string, []>("current_value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215034496)))];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216214208)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = var_1655, groups = var_1605, pad = current_value_15_pad_0, pad_type = current_value_15_pad_type_0, strides = var_1653, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_value_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1662_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1664_cast_fp16 = mul(x = var_63_cast_fp16_7, y = var_161_cast_fp16)[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_29_cast_fp16 = add(x = var_1662_cast_fp16, y = var_1664_cast_fp16)[name = tensor<string, []>("key_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1666_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1668_cast_fp16 = mul(x = var_78_cast_fp16_7, y = var_161_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_29_cast_fp16 = add(x = var_1666_cast_fp16, y = var_1668_cast_fp16)[name = tensor<string, []>("value_29_cast_fp16")];
+            tensor<int32, [4]> var_1671 = const()[name = tensor<string, []>("op_1671"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1672_cast_fp16 = reshape(shape = var_1671, x = query_29_cast_fp16)[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<fp16, []> var_1673_to_fp16 = const()[name = tensor<string, []>("op_1673_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1674_cast_fp16 = mul(x = var_1672_cast_fp16, y = var_1673_to_fp16)[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<int32, [4]> var_1675 = const()[name = tensor<string, []>("op_1675"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1676_cast_fp16 = reshape(shape = var_1675, x = key_29_cast_fp16)[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<bool, []> mh_w_43_transpose_x_0 = const()[name = tensor<string, []>("mh_w_43_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_43_transpose_y_0 = const()[name = tensor<string, []>("mh_w_43_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_1674_cast_fp16, y = var_1676_cast_fp16)[name = tensor<string, []>("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1684_cast_fp16 = softmax(axis = var_1598, x = mh_w_45_cast_fp16)[name = tensor<string, []>("op_1684_cast_fp16")];
+            tensor<int32, [4]> var_1685 = const()[name = tensor<string, []>("op_1685"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1686_cast_fp16 = reshape(shape = var_1685, x = value_29_cast_fp16)[name = tensor<string, []>("op_1686_cast_fp16")];
+            tensor<bool, []> attn_29_transpose_x_0 = const()[name = tensor<string, []>("attn_29_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_29_transpose_y_0 = const()[name = tensor<string, []>("attn_29_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_1686_cast_fp16, y = var_1684_cast_fp16)[name = tensor<string, []>("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_1689 = const()[name = tensor<string, []>("op_1689"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_71_cast_fp16 = reshape(shape = var_1689, x = attn_29_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<int32, [2]> var_1693 = const()[name = tensor<string, []>("op_1693"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1695 = const()[name = tensor<string, []>("op_1695"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_105_pad_type_0 = const()[name = tensor<string, []>("obj_105_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_105_pad_0 = const()[name = tensor<string, []>("obj_105_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216215808)))];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217395520)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_105_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = var_1695, groups = var_1605, pad = obj_105_pad_0, pad_type = obj_105_pad_type_0, strides = var_1693, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("obj_105_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_105_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, [1]> var_1705 = const()[name = tensor<string, []>("op_1705"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_45_cast_fp16 = reduce_mean(axes = var_1705, keep_dims = var_1606, x = inputs_45_cast_fp16)[name = tensor<string, []>("channels_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_45_cast_fp16 = sub(x = inputs_45_cast_fp16, y = channels_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = zero_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_sq_45_cast_fp16")];
+            tensor<int32, [1]> var_1709 = const()[name = tensor<string, []>("op_1709"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1710_cast_fp16 = reduce_mean(axes = var_1709, keep_dims = var_1606, x = zero_mean_sq_45_cast_fp16)[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1712_cast_fp16 = add(x = var_1710_cast_fp16, y = var_1711_to_fp16)[name = tensor<string, []>("op_1712_cast_fp16")];
+            tensor<fp16, []> denom_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0_to_fp16, x = var_1712_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = denom_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_107_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217397120)))];
+            tensor<fp16, [768]> obj_107_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_107_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217398720)))];
+            tensor<fp16, []> obj_107_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_107_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_107_cast_fp16 = batch_norm(beta = obj_107_beta_0_to_fp16, epsilon = obj_107_epsilon_0_to_fp16, gamma = obj_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_107_cast_fp16")];
+            tensor<int32, [2]> var_1727 = const()[name = tensor<string, []>("op_1727"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1729 = const()[name = tensor<string, []>("op_1729"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_31_pad_type_0 = const()[name = tensor<string, []>("query_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_31_pad_0 = const()[name = tensor<string, []>("query_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217400320)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218580032)))];
+            tensor<fp16, [1, 768, 1, 1]> query_31_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_bias_to_fp16, dilations = var_1729, groups = var_1605, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = var_1727, weight = layers_7_encoder_attn_q_proj_weight_to_fp16, x = obj_107_cast_fp16)[name = tensor<string, []>("query_31_cast_fp16")];
+            tensor<int32, [2]> var_1733 = const()[name = tensor<string, []>("op_1733"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1735 = const()[name = tensor<string, []>("op_1735"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_31_pad_type_0 = const()[name = tensor<string, []>("key_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_31_pad_0 = const()[name = tensor<string, []>("key_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218581632)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_31_cast_fp16 = conv(dilations = var_1735, groups = var_1605, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = var_1733, weight = layers_7_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_31_cast_fp16")];
+            tensor<int32, [2]> var_1740 = const()[name = tensor<string, []>("op_1740"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1742 = const()[name = tensor<string, []>("op_1742"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_31_pad_type_0 = const()[name = tensor<string, []>("value_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_31_pad_0 = const()[name = tensor<string, []>("value_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219761344)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220941056)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_31_cast_fp16 = conv(bias = layers_7_encoder_attn_v_proj_bias_to_fp16, dilations = var_1742, groups = var_1605, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = var_1740, weight = layers_7_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_31_cast_fp16")];
+            tensor<int32, [4]> var_1746 = const()[name = tensor<string, []>("op_1746"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1747_cast_fp16 = reshape(shape = var_1746, x = query_31_cast_fp16)[name = tensor<string, []>("op_1747_cast_fp16")];
+            tensor<fp16, []> var_1748_to_fp16 = const()[name = tensor<string, []>("op_1748_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1749_cast_fp16 = mul(x = var_1747_cast_fp16, y = var_1748_to_fp16)[name = tensor<string, []>("op_1749_cast_fp16")];
+            tensor<int32, [4]> var_1750 = const()[name = tensor<string, []>("op_1750"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1751_cast_fp16 = reshape(shape = var_1750, x = key_31_cast_fp16)[name = tensor<string, []>("op_1751_cast_fp16")];
+            tensor<bool, []> mh_w_47_transpose_x_0 = const()[name = tensor<string, []>("mh_w_47_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_47_transpose_y_0 = const()[name = tensor<string, []>("mh_w_47_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_1749_cast_fp16, y = var_1751_cast_fp16)[name = tensor<string, []>("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_111_cast_fp16 = softmax(axis = var_1598, x = mh_w_47_cast_fp16)[name = tensor<string, []>("obj_111_cast_fp16")];
+            tensor<int32, [4]> var_1755 = const()[name = tensor<string, []>("op_1755"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1756_cast_fp16 = reshape(shape = var_1755, x = value_31_cast_fp16)[name = tensor<string, []>("op_1756_cast_fp16")];
+            tensor<bool, []> attn_31_transpose_x_0 = const()[name = tensor<string, []>("attn_31_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_31_transpose_y_0 = const()[name = tensor<string, []>("attn_31_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_1756_cast_fp16, y = obj_111_cast_fp16)[name = tensor<string, []>("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_1759 = const()[name = tensor<string, []>("op_1759"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_73_cast_fp16 = reshape(shape = var_1759, x = attn_31_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<int32, [2]> var_1763 = const()[name = tensor<string, []>("op_1763"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1765 = const()[name = tensor<string, []>("op_1765"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_109_pad_type_0 = const()[name = tensor<string, []>("obj_109_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_109_pad_0 = const()[name = tensor<string, []>("obj_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220942656)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222122368)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_109_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_bias_to_fp16, dilations = var_1765, groups = var_1605, pad = obj_109_pad_0, pad_type = obj_109_pad_type_0, strides = var_1763, weight = layers_7_encoder_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_109_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_109_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> var_1771 = const()[name = tensor<string, []>("op_1771"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_47_cast_fp16 = reduce_mean(axes = var_1771, keep_dims = var_1606, x = inputs_47_cast_fp16)[name = tensor<string, []>("channels_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_47_cast_fp16 = sub(x = inputs_47_cast_fp16, y = channels_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = zero_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_sq_47_cast_fp16")];
+            tensor<int32, [1]> var_1775 = const()[name = tensor<string, []>("op_1775"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1776_cast_fp16 = reduce_mean(axes = var_1775, keep_dims = var_1606, x = zero_mean_sq_47_cast_fp16)[name = tensor<string, []>("op_1776_cast_fp16")];
+            tensor<fp16, []> var_1777_to_fp16 = const()[name = tensor<string, []>("op_1777_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1778_cast_fp16 = add(x = var_1776_cast_fp16, y = var_1777_to_fp16)[name = tensor<string, []>("op_1778_cast_fp16")];
+            tensor<fp16, []> denom_47_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_47_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0_to_fp16, x = var_1778_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = denom_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222123968)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222125568)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<int32, [2]> var_1789 = const()[name = tensor<string, []>("op_1789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1791 = const()[name = tensor<string, []>("op_1791"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222127168)))];
+            tensor<fp16, [3072]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226845824)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_77_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = var_1791, groups = var_1605, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = var_1789, weight = layers_7_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<int32, [2]> var_1797 = const()[name = tensor<string, []>("op_1797"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1799 = const()[name = tensor<string, []>("op_1799"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226852032)))];
+            tensor<fp16, [768]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231570688)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_17_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = var_1799, groups = var_1605, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_1797, weight = layers_7_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_1812 = const()[name = tensor<string, []>("op_1812"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1819 = const()[name = tensor<string, []>("op_1819"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1820 = const()[name = tensor<string, []>("op_1820"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1832 = const()[name = tensor<string, []>("op_1832"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_49_cast_fp16 = reduce_mean(axes = var_1832, keep_dims = var_1820, x = inputs_49_cast_fp16)[name = tensor<string, []>("channels_mean_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_49_cast_fp16 = sub(x = inputs_49_cast_fp16, y = channels_mean_49_cast_fp16)[name = tensor<string, []>("zero_mean_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = zero_mean_49_cast_fp16)[name = tensor<string, []>("zero_mean_sq_49_cast_fp16")];
+            tensor<int32, [1]> var_1836 = const()[name = tensor<string, []>("op_1836"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1837_cast_fp16 = reduce_mean(axes = var_1836, keep_dims = var_1820, x = zero_mean_sq_49_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<fp16, []> var_1838_to_fp16 = const()[name = tensor<string, []>("op_1838_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1839_cast_fp16 = add(x = var_1837_cast_fp16, y = var_1838_to_fp16)[name = tensor<string, []>("op_1839_cast_fp16")];
+            tensor<fp16, []> denom_49_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_49_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0_to_fp16, x = var_1839_cast_fp16)[name = tensor<string, []>("denom_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = denom_49_cast_fp16)[name = tensor<string, []>("out_49_cast_fp16")];
+            tensor<fp16, [768]> obj_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_113_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231572288)))];
+            tensor<fp16, [768]> obj_113_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_113_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231573888)))];
+            tensor<fp16, []> obj_113_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_113_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor<string, []>("obj_113_cast_fp16")];
+            tensor<int32, [2]> var_1854 = const()[name = tensor<string, []>("op_1854"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1856 = const()[name = tensor<string, []>("op_1856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_33_pad_type_0 = const()[name = tensor<string, []>("query_33_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_33_pad_0 = const()[name = tensor<string, []>("query_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231575488)))];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(232755200)))];
+            tensor<fp16, [1, 768, 1, 1]> query_33_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = var_1856, groups = var_1819, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = var_1854, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("query_33_cast_fp16")];
+            tensor<int32, [2]> var_1860 = const()[name = tensor<string, []>("op_1860"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1862 = const()[name = tensor<string, []>("op_1862"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_17_pad_type_0 = const()[name = tensor<string, []>("current_key_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_17_pad_0 = const()[name = tensor<string, []>("current_key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(232756800)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_17_cast_fp16 = conv(dilations = var_1862, groups = var_1819, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = var_1860, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_key_17_cast_fp16")];
+            tensor<int32, [2]> var_1867 = const()[name = tensor<string, []>("op_1867"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1869 = const()[name = tensor<string, []>("op_1869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_17_pad_type_0 = const()[name = tensor<string, []>("current_value_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_17_pad_0 = const()[name = tensor<string, []>("current_value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(233936512)))];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235116224)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = var_1869, groups = var_1819, pad = current_value_17_pad_0, pad_type = current_value_17_pad_type_0, strides = var_1867, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_value_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1876_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1876_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1878_cast_fp16 = mul(x = var_63_cast_fp16_8, y = var_161_cast_fp16)[name = tensor<string, []>("op_1878_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_33_cast_fp16 = add(x = var_1876_cast_fp16, y = var_1878_cast_fp16)[name = tensor<string, []>("key_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1880_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1880_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1882_cast_fp16 = mul(x = var_78_cast_fp16_8, y = var_161_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_33_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1882_cast_fp16)[name = tensor<string, []>("value_33_cast_fp16")];
+            tensor<int32, [4]> var_1885 = const()[name = tensor<string, []>("op_1885"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1886_cast_fp16 = reshape(shape = var_1885, x = query_33_cast_fp16)[name = tensor<string, []>("op_1886_cast_fp16")];
+            tensor<fp16, []> var_1887_to_fp16 = const()[name = tensor<string, []>("op_1887_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1888_cast_fp16 = mul(x = var_1886_cast_fp16, y = var_1887_to_fp16)[name = tensor<string, []>("op_1888_cast_fp16")];
+            tensor<int32, [4]> var_1889 = const()[name = tensor<string, []>("op_1889"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1890_cast_fp16 = reshape(shape = var_1889, x = key_33_cast_fp16)[name = tensor<string, []>("op_1890_cast_fp16")];
+            tensor<bool, []> mh_w_49_transpose_x_0 = const()[name = tensor<string, []>("mh_w_49_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_49_transpose_y_0 = const()[name = tensor<string, []>("mh_w_49_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1888_cast_fp16, y = var_1890_cast_fp16)[name = tensor<string, []>("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1898_cast_fp16 = softmax(axis = var_1812, x = mh_w_51_cast_fp16)[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<int32, [4]> var_1899 = const()[name = tensor<string, []>("op_1899"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1900_cast_fp16 = reshape(shape = var_1899, x = value_33_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<bool, []> attn_33_transpose_x_0 = const()[name = tensor<string, []>("attn_33_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_33_transpose_y_0 = const()[name = tensor<string, []>("attn_33_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_1900_cast_fp16, y = var_1898_cast_fp16)[name = tensor<string, []>("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_1903 = const()[name = tensor<string, []>("op_1903"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_81_cast_fp16 = reshape(shape = var_1903, x = attn_33_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<int32, [2]> var_1907 = const()[name = tensor<string, []>("op_1907"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1909 = const()[name = tensor<string, []>("op_1909"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_119_pad_type_0 = const()[name = tensor<string, []>("obj_119_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_119_pad_0 = const()[name = tensor<string, []>("obj_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235117824)))];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236297536)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_119_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = var_1909, groups = var_1819, pad = obj_119_pad_0, pad_type = obj_119_pad_type_0, strides = var_1907, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_119_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_119_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> var_1919 = const()[name = tensor<string, []>("op_1919"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_51_cast_fp16 = reduce_mean(axes = var_1919, keep_dims = var_1820, x = inputs_51_cast_fp16)[name = tensor<string, []>("channels_mean_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_51_cast_fp16 = sub(x = inputs_51_cast_fp16, y = channels_mean_51_cast_fp16)[name = tensor<string, []>("zero_mean_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = zero_mean_51_cast_fp16)[name = tensor<string, []>("zero_mean_sq_51_cast_fp16")];
+            tensor<int32, [1]> var_1923 = const()[name = tensor<string, []>("op_1923"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1924_cast_fp16 = reduce_mean(axes = var_1923, keep_dims = var_1820, x = zero_mean_sq_51_cast_fp16)[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<fp16, []> var_1925_to_fp16 = const()[name = tensor<string, []>("op_1925_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1926_cast_fp16 = add(x = var_1924_cast_fp16, y = var_1925_to_fp16)[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<fp16, []> denom_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_51_cast_fp16 = rsqrt(epsilon = denom_51_epsilon_0_to_fp16, x = var_1926_cast_fp16)[name = tensor<string, []>("denom_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = denom_51_cast_fp16)[name = tensor<string, []>("out_51_cast_fp16")];
+            tensor<fp16, [768]> obj_121_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_121_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236299136)))];
+            tensor<fp16, [768]> obj_121_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_121_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236300736)))];
+            tensor<fp16, []> obj_121_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_121_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor<string, []>("obj_121_cast_fp16")];
+            tensor<int32, [2]> var_1941 = const()[name = tensor<string, []>("op_1941"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1943 = const()[name = tensor<string, []>("op_1943"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_35_pad_type_0 = const()[name = tensor<string, []>("query_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_35_pad_0 = const()[name = tensor<string, []>("query_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236302336)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237482048)))];
+            tensor<fp16, [1, 768, 1, 1]> query_35_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_bias_to_fp16, dilations = var_1943, groups = var_1819, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = var_1941, weight = layers_8_encoder_attn_q_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor<string, []>("query_35_cast_fp16")];
+            tensor<int32, [2]> var_1947 = const()[name = tensor<string, []>("op_1947"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1949 = const()[name = tensor<string, []>("op_1949"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_35_pad_type_0 = const()[name = tensor<string, []>("key_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_35_pad_0 = const()[name = tensor<string, []>("key_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237483648)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_35_cast_fp16 = conv(dilations = var_1949, groups = var_1819, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = var_1947, weight = layers_8_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_35_cast_fp16")];
+            tensor<int32, [2]> var_1954 = const()[name = tensor<string, []>("op_1954"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1956 = const()[name = tensor<string, []>("op_1956"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_35_pad_type_0 = const()[name = tensor<string, []>("value_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_35_pad_0 = const()[name = tensor<string, []>("value_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238663360)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239843072)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_35_cast_fp16 = conv(bias = layers_8_encoder_attn_v_proj_bias_to_fp16, dilations = var_1956, groups = var_1819, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = var_1954, weight = layers_8_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_35_cast_fp16")];
+            tensor<int32, [4]> var_1960 = const()[name = tensor<string, []>("op_1960"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1961_cast_fp16 = reshape(shape = var_1960, x = query_35_cast_fp16)[name = tensor<string, []>("op_1961_cast_fp16")];
+            tensor<fp16, []> var_1962_to_fp16 = const()[name = tensor<string, []>("op_1962_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1963_cast_fp16 = mul(x = var_1961_cast_fp16, y = var_1962_to_fp16)[name = tensor<string, []>("op_1963_cast_fp16")];
+            tensor<int32, [4]> var_1964 = const()[name = tensor<string, []>("op_1964"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1965_cast_fp16 = reshape(shape = var_1964, x = key_35_cast_fp16)[name = tensor<string, []>("op_1965_cast_fp16")];
+            tensor<bool, []> mh_w_53_transpose_x_0 = const()[name = tensor<string, []>("mh_w_53_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_53_transpose_y_0 = const()[name = tensor<string, []>("mh_w_53_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1963_cast_fp16, y = var_1965_cast_fp16)[name = tensor<string, []>("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_125_cast_fp16 = softmax(axis = var_1812, x = mh_w_53_cast_fp16)[name = tensor<string, []>("obj_125_cast_fp16")];
+            tensor<int32, [4]> var_1969 = const()[name = tensor<string, []>("op_1969"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1970_cast_fp16 = reshape(shape = var_1969, x = value_35_cast_fp16)[name = tensor<string, []>("op_1970_cast_fp16")];
+            tensor<bool, []> attn_35_transpose_x_0 = const()[name = tensor<string, []>("attn_35_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_35_transpose_y_0 = const()[name = tensor<string, []>("attn_35_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_1970_cast_fp16, y = obj_125_cast_fp16)[name = tensor<string, []>("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_1973 = const()[name = tensor<string, []>("op_1973"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_83_cast_fp16 = reshape(shape = var_1973, x = attn_35_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<int32, [2]> var_1977 = const()[name = tensor<string, []>("op_1977"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1979 = const()[name = tensor<string, []>("op_1979"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_123_pad_type_0 = const()[name = tensor<string, []>("obj_123_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_123_pad_0 = const()[name = tensor<string, []>("obj_123_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239844672)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241024384)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_123_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_bias_to_fp16, dilations = var_1979, groups = var_1819, pad = obj_123_pad_0, pad_type = obj_123_pad_type_0, strides = var_1977, weight = layers_8_encoder_attn_o_proj_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("obj_123_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_123_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, [1]> var_1988 = const()[name = tensor<string, []>("op_1988"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_53_cast_fp16 = reduce_mean(axes = var_1988, keep_dims = var_1820, x = inputs_53_cast_fp16)[name = tensor<string, []>("channels_mean_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_53_cast_fp16 = sub(x = inputs_53_cast_fp16, y = channels_mean_53_cast_fp16)[name = tensor<string, []>("zero_mean_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = zero_mean_53_cast_fp16)[name = tensor<string, []>("zero_mean_sq_53_cast_fp16")];
+            tensor<int32, [1]> var_1992 = const()[name = tensor<string, []>("op_1992"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1993_cast_fp16 = reduce_mean(axes = var_1992, keep_dims = var_1820, x = zero_mean_sq_53_cast_fp16)[name = tensor<string, []>("op_1993_cast_fp16")];
+            tensor<fp16, []> var_1994_to_fp16 = const()[name = tensor<string, []>("op_1994_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1995_cast_fp16 = add(x = var_1993_cast_fp16, y = var_1994_to_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<fp16, []> denom_53_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_53_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_53_cast_fp16 = rsqrt(epsilon = denom_53_epsilon_0_to_fp16, x = var_1995_cast_fp16)[name = tensor<string, []>("denom_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = denom_53_cast_fp16)[name = tensor<string, []>("out_53_cast_fp16")];
+            tensor<fp16, [768]> input_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241025984)))];
+            tensor<fp16, [768]> input_85_beta_0_to_fp16 = const()[name = tensor<string, []>("input_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241027584)))];
+            tensor<fp16, []> input_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<int32, [2]> var_2006 = const()[name = tensor<string, []>("op_2006"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2008 = const()[name = tensor<string, []>("op_2008"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_87_pad_type_0 = const()[name = tensor<string, []>("input_87_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_87_pad_0 = const()[name = tensor<string, []>("input_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241029184)))];
+            tensor<fp16, [3072]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245747840)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_87_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = var_2008, groups = var_1819, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = var_2006, weight = layers_8_fc1_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_mode_0 = const()[name = tensor<string, []>("input_89_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<int32, [2]> var_2014 = const()[name = tensor<string, []>("op_2014"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2016 = const()[name = tensor<string, []>("op_2016"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245754048)))];
+            tensor<fp16, [768]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250472704)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_19_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = var_2016, groups = var_1819, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_2014, weight = layers_8_fc2_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, []> var_2030 = const()[name = tensor<string, []>("op_2030"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2037 = const()[name = tensor<string, []>("op_2037"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2038 = const()[name = tensor<string, []>("op_2038"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2050 = const()[name = tensor<string, []>("op_2050"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_55_cast_fp16 = reduce_mean(axes = var_2050, keep_dims = var_2038, x = inputs_55_cast_fp16)[name = tensor<string, []>("channels_mean_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_55_cast_fp16 = sub(x = inputs_55_cast_fp16, y = channels_mean_55_cast_fp16)[name = tensor<string, []>("zero_mean_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = zero_mean_55_cast_fp16)[name = tensor<string, []>("zero_mean_sq_55_cast_fp16")];
+            tensor<int32, [1]> var_2054 = const()[name = tensor<string, []>("op_2054"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2055_cast_fp16 = reduce_mean(axes = var_2054, keep_dims = var_2038, x = zero_mean_sq_55_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<fp16, []> var_2056_to_fp16 = const()[name = tensor<string, []>("op_2056_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2057_cast_fp16 = add(x = var_2055_cast_fp16, y = var_2056_to_fp16)[name = tensor<string, []>("op_2057_cast_fp16")];
+            tensor<fp16, []> denom_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_55_cast_fp16 = rsqrt(epsilon = denom_55_epsilon_0_to_fp16, x = var_2057_cast_fp16)[name = tensor<string, []>("denom_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = denom_55_cast_fp16)[name = tensor<string, []>("out_55_cast_fp16")];
+            tensor<fp16, [768]> obj_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_127_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250474304)))];
+            tensor<fp16, [768]> obj_127_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_127_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250475904)))];
+            tensor<fp16, []> obj_127_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_127_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_127_cast_fp16 = batch_norm(beta = obj_127_beta_0_to_fp16, epsilon = obj_127_epsilon_0_to_fp16, gamma = obj_127_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor<string, []>("obj_127_cast_fp16")];
+            tensor<int32, [2]> var_2072 = const()[name = tensor<string, []>("op_2072"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2074 = const()[name = tensor<string, []>("op_2074"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_37_pad_type_0 = const()[name = tensor<string, []>("query_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_37_pad_0 = const()[name = tensor<string, []>("query_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250477504)))];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251657216)))];
+            tensor<fp16, [1, 768, 1, 1]> query_37_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = var_2074, groups = var_2037, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = var_2072, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("query_37_cast_fp16")];
+            tensor<int32, [2]> var_2078 = const()[name = tensor<string, []>("op_2078"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_19_pad_type_0 = const()[name = tensor<string, []>("current_key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_19_pad_0 = const()[name = tensor<string, []>("current_key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251658816)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_19_cast_fp16 = conv(dilations = var_2080, groups = var_2037, pad = current_key_19_pad_0, pad_type = current_key_19_pad_type_0, strides = var_2078, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_key_19_cast_fp16")];
+            tensor<int32, [2]> var_2085 = const()[name = tensor<string, []>("op_2085"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2087 = const()[name = tensor<string, []>("op_2087"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_19_pad_type_0 = const()[name = tensor<string, []>("current_value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_19_pad_0 = const()[name = tensor<string, []>("current_value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(252838528)))];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254018240)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = var_2087, groups = var_2037, pad = current_value_19_pad_0, pad_type = current_value_19_pad_type_0, strides = var_2085, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_value_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2094_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2094_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2096_cast_fp16 = mul(x = var_63_cast_fp16_9, y = var_161_cast_fp16)[name = tensor<string, []>("op_2096_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_37_cast_fp16 = add(x = var_2094_cast_fp16, y = var_2096_cast_fp16)[name = tensor<string, []>("key_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2098_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2098_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2100_cast_fp16 = mul(x = var_78_cast_fp16_9, y = var_161_cast_fp16)[name = tensor<string, []>("op_2100_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_37_cast_fp16 = add(x = var_2098_cast_fp16, y = var_2100_cast_fp16)[name = tensor<string, []>("value_37_cast_fp16")];
+            tensor<int32, [4]> var_2103 = const()[name = tensor<string, []>("op_2103"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2104_cast_fp16 = reshape(shape = var_2103, x = query_37_cast_fp16)[name = tensor<string, []>("op_2104_cast_fp16")];
+            tensor<fp16, []> var_2105_to_fp16 = const()[name = tensor<string, []>("op_2105_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2106_cast_fp16 = mul(x = var_2104_cast_fp16, y = var_2105_to_fp16)[name = tensor<string, []>("op_2106_cast_fp16")];
+            tensor<int32, [4]> var_2107 = const()[name = tensor<string, []>("op_2107"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2108_cast_fp16 = reshape(shape = var_2107, x = key_37_cast_fp16)[name = tensor<string, []>("op_2108_cast_fp16")];
+            tensor<bool, []> mh_w_55_transpose_x_0 = const()[name = tensor<string, []>("mh_w_55_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_55_transpose_y_0 = const()[name = tensor<string, []>("mh_w_55_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_2106_cast_fp16, y = var_2108_cast_fp16)[name = tensor<string, []>("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2116_cast_fp16 = softmax(axis = var_2030, x = mh_w_57_cast_fp16)[name = tensor<string, []>("op_2116_cast_fp16")];
+            tensor<int32, [4]> var_2117 = const()[name = tensor<string, []>("op_2117"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2118_cast_fp16 = reshape(shape = var_2117, x = value_37_cast_fp16)[name = tensor<string, []>("op_2118_cast_fp16")];
+            tensor<bool, []> attn_37_transpose_x_0 = const()[name = tensor<string, []>("attn_37_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_37_transpose_y_0 = const()[name = tensor<string, []>("attn_37_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2118_cast_fp16, y = var_2116_cast_fp16)[name = tensor<string, []>("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2121 = const()[name = tensor<string, []>("op_2121"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_91_cast_fp16 = reshape(shape = var_2121, x = attn_37_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<int32, [2]> var_2125 = const()[name = tensor<string, []>("op_2125"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2127 = const()[name = tensor<string, []>("op_2127"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_133_pad_type_0 = const()[name = tensor<string, []>("obj_133_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_133_pad_0 = const()[name = tensor<string, []>("obj_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254019840)))];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255199552)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_133_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = var_2127, groups = var_2037, pad = obj_133_pad_0, pad_type = obj_133_pad_type_0, strides = var_2125, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("obj_133_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_133_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, [1]> var_2137 = const()[name = tensor<string, []>("op_2137"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_57_cast_fp16 = reduce_mean(axes = var_2137, keep_dims = var_2038, x = inputs_57_cast_fp16)[name = tensor<string, []>("channels_mean_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_57_cast_fp16 = sub(x = inputs_57_cast_fp16, y = channels_mean_57_cast_fp16)[name = tensor<string, []>("zero_mean_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = zero_mean_57_cast_fp16)[name = tensor<string, []>("zero_mean_sq_57_cast_fp16")];
+            tensor<int32, [1]> var_2141 = const()[name = tensor<string, []>("op_2141"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2142_cast_fp16 = reduce_mean(axes = var_2141, keep_dims = var_2038, x = zero_mean_sq_57_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<fp16, []> var_2143_to_fp16 = const()[name = tensor<string, []>("op_2143_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2144_cast_fp16 = add(x = var_2142_cast_fp16, y = var_2143_to_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<fp16, []> denom_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_57_cast_fp16 = rsqrt(epsilon = denom_57_epsilon_0_to_fp16, x = var_2144_cast_fp16)[name = tensor<string, []>("denom_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = denom_57_cast_fp16)[name = tensor<string, []>("out_57_cast_fp16")];
+            tensor<fp16, [768]> obj_135_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_135_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255201152)))];
+            tensor<fp16, [768]> obj_135_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_135_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255202752)))];
+            tensor<fp16, []> obj_135_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_135_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_135_cast_fp16 = batch_norm(beta = obj_135_beta_0_to_fp16, epsilon = obj_135_epsilon_0_to_fp16, gamma = obj_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor<string, []>("obj_135_cast_fp16")];
+            tensor<int32, [2]> var_2159 = const()[name = tensor<string, []>("op_2159"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2161 = const()[name = tensor<string, []>("op_2161"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_39_pad_type_0 = const()[name = tensor<string, []>("query_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_39_pad_0 = const()[name = tensor<string, []>("query_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255204352)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256384064)))];
+            tensor<fp16, [1, 768, 1, 1]> query_39_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_bias_to_fp16, dilations = var_2161, groups = var_2037, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = var_2159, weight = layers_9_encoder_attn_q_proj_weight_to_fp16, x = obj_135_cast_fp16)[name = tensor<string, []>("query_39_cast_fp16")];
+            tensor<int32, [2]> var_2165 = const()[name = tensor<string, []>("op_2165"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2167 = const()[name = tensor<string, []>("op_2167"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_39_pad_type_0 = const()[name = tensor<string, []>("key_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_39_pad_0 = const()[name = tensor<string, []>("key_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256385664)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_39_cast_fp16 = conv(dilations = var_2167, groups = var_2037, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = var_2165, weight = layers_9_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_39_cast_fp16")];
+            tensor<int32, [2]> var_2172 = const()[name = tensor<string, []>("op_2172"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2174 = const()[name = tensor<string, []>("op_2174"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_39_pad_type_0 = const()[name = tensor<string, []>("value_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_39_pad_0 = const()[name = tensor<string, []>("value_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257565376)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258745088)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_39_cast_fp16 = conv(bias = layers_9_encoder_attn_v_proj_bias_to_fp16, dilations = var_2174, groups = var_2037, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = var_2172, weight = layers_9_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_39_cast_fp16")];
+            tensor<int32, [4]> var_2178 = const()[name = tensor<string, []>("op_2178"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2179_cast_fp16 = reshape(shape = var_2178, x = query_39_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, []> var_2180_to_fp16 = const()[name = tensor<string, []>("op_2180_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2181_cast_fp16 = mul(x = var_2179_cast_fp16, y = var_2180_to_fp16)[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<int32, [4]> var_2182 = const()[name = tensor<string, []>("op_2182"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2183_cast_fp16 = reshape(shape = var_2182, x = key_39_cast_fp16)[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<bool, []> mh_w_59_transpose_x_0 = const()[name = tensor<string, []>("mh_w_59_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_59_transpose_y_0 = const()[name = tensor<string, []>("mh_w_59_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_2181_cast_fp16, y = var_2183_cast_fp16)[name = tensor<string, []>("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_139_cast_fp16 = softmax(axis = var_2030, x = mh_w_59_cast_fp16)[name = tensor<string, []>("obj_139_cast_fp16")];
+            tensor<int32, [4]> var_2187 = const()[name = tensor<string, []>("op_2187"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2188_cast_fp16 = reshape(shape = var_2187, x = value_39_cast_fp16)[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<bool, []> attn_39_transpose_x_0 = const()[name = tensor<string, []>("attn_39_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_39_transpose_y_0 = const()[name = tensor<string, []>("attn_39_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2188_cast_fp16, y = obj_139_cast_fp16)[name = tensor<string, []>("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2191 = const()[name = tensor<string, []>("op_2191"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_93_cast_fp16 = reshape(shape = var_2191, x = attn_39_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<int32, [2]> var_2195 = const()[name = tensor<string, []>("op_2195"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2197 = const()[name = tensor<string, []>("op_2197"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_137_pad_type_0 = const()[name = tensor<string, []>("obj_137_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_137_pad_0 = const()[name = tensor<string, []>("obj_137_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258746688)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259926400)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_137_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_bias_to_fp16, dilations = var_2197, groups = var_2037, pad = obj_137_pad_0, pad_type = obj_137_pad_type_0, strides = var_2195, weight = layers_9_encoder_attn_o_proj_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("obj_137_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_137_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> var_2206 = const()[name = tensor<string, []>("op_2206"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_59_cast_fp16 = reduce_mean(axes = var_2206, keep_dims = var_2038, x = inputs_59_cast_fp16)[name = tensor<string, []>("channels_mean_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_59_cast_fp16 = sub(x = inputs_59_cast_fp16, y = channels_mean_59_cast_fp16)[name = tensor<string, []>("zero_mean_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = zero_mean_59_cast_fp16)[name = tensor<string, []>("zero_mean_sq_59_cast_fp16")];
+            tensor<int32, [1]> var_2210 = const()[name = tensor<string, []>("op_2210"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2211_cast_fp16 = reduce_mean(axes = var_2210, keep_dims = var_2038, x = zero_mean_sq_59_cast_fp16)[name = tensor<string, []>("op_2211_cast_fp16")];
+            tensor<fp16, []> var_2212_to_fp16 = const()[name = tensor<string, []>("op_2212_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2213_cast_fp16 = add(x = var_2211_cast_fp16, y = var_2212_to_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
+            tensor<fp16, []> denom_59_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_59_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_59_cast_fp16 = rsqrt(epsilon = denom_59_epsilon_0_to_fp16, x = var_2213_cast_fp16)[name = tensor<string, []>("denom_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = denom_59_cast_fp16)[name = tensor<string, []>("out_59_cast_fp16")];
+            tensor<fp16, [768]> input_95_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_95_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259928000)))];
+            tensor<fp16, [768]> input_95_beta_0_to_fp16 = const()[name = tensor<string, []>("input_95_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259929600)))];
+            tensor<fp16, []> input_95_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_95_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<int32, [2]> var_2224 = const()[name = tensor<string, []>("op_2224"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2226 = const()[name = tensor<string, []>("op_2226"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_97_pad_type_0 = const()[name = tensor<string, []>("input_97_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_97_pad_0 = const()[name = tensor<string, []>("input_97_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259931200)))];
+            tensor<fp16, [3072]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264649856)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_97_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = var_2226, groups = var_2037, pad = input_97_pad_0, pad_type = input_97_pad_type_0, strides = var_2224, weight = layers_9_fc1_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_mode_0 = const()[name = tensor<string, []>("input_99_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<int32, [2]> var_2232 = const()[name = tensor<string, []>("op_2232"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2234 = const()[name = tensor<string, []>("op_2234"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264656064)))];
+            tensor<fp16, [768]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269374720)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_21_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = var_2234, groups = var_2037, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_2232, weight = layers_9_fc2_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_2248 = const()[name = tensor<string, []>("op_2248"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2255 = const()[name = tensor<string, []>("op_2255"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2256 = const()[name = tensor<string, []>("op_2256"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2268 = const()[name = tensor<string, []>("op_2268"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_61_cast_fp16 = reduce_mean(axes = var_2268, keep_dims = var_2256, x = inputs_61_cast_fp16)[name = tensor<string, []>("channels_mean_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_61_cast_fp16 = sub(x = inputs_61_cast_fp16, y = channels_mean_61_cast_fp16)[name = tensor<string, []>("zero_mean_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = zero_mean_61_cast_fp16)[name = tensor<string, []>("zero_mean_sq_61_cast_fp16")];
+            tensor<int32, [1]> var_2272 = const()[name = tensor<string, []>("op_2272"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2273_cast_fp16 = reduce_mean(axes = var_2272, keep_dims = var_2256, x = zero_mean_sq_61_cast_fp16)[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<fp16, []> var_2274_to_fp16 = const()[name = tensor<string, []>("op_2274_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2275_cast_fp16 = add(x = var_2273_cast_fp16, y = var_2274_to_fp16)[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<fp16, []> denom_61_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_61_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_61_cast_fp16 = rsqrt(epsilon = denom_61_epsilon_0_to_fp16, x = var_2275_cast_fp16)[name = tensor<string, []>("denom_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = denom_61_cast_fp16)[name = tensor<string, []>("out_61_cast_fp16")];
+            tensor<fp16, [768]> obj_141_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_141_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269376320)))];
+            tensor<fp16, [768]> obj_141_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_141_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269377920)))];
+            tensor<fp16, []> obj_141_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_141_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor<string, []>("obj_141_cast_fp16")];
+            tensor<int32, [2]> var_2290 = const()[name = tensor<string, []>("op_2290"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2292 = const()[name = tensor<string, []>("op_2292"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_41_pad_type_0 = const()[name = tensor<string, []>("query_41_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_41_pad_0 = const()[name = tensor<string, []>("query_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269379520)))];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270559232)))];
+            tensor<fp16, [1, 768, 1, 1]> query_41_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = var_2292, groups = var_2255, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = var_2290, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("query_41_cast_fp16")];
+            tensor<int32, [2]> var_2296 = const()[name = tensor<string, []>("op_2296"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2298 = const()[name = tensor<string, []>("op_2298"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_21_pad_type_0 = const()[name = tensor<string, []>("current_key_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_21_pad_0 = const()[name = tensor<string, []>("current_key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270560832)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_21_cast_fp16 = conv(dilations = var_2298, groups = var_2255, pad = current_key_21_pad_0, pad_type = current_key_21_pad_type_0, strides = var_2296, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_key_21_cast_fp16")];
+            tensor<int32, [2]> var_2303 = const()[name = tensor<string, []>("op_2303"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2305 = const()[name = tensor<string, []>("op_2305"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_21_pad_type_0 = const()[name = tensor<string, []>("current_value_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_21_pad_0 = const()[name = tensor<string, []>("current_value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(271740544)))];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(272920256)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = var_2305, groups = var_2255, pad = current_value_21_pad_0, pad_type = current_value_21_pad_type_0, strides = var_2303, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_value_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2312_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2312_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2314_cast_fp16 = mul(x = var_63_cast_fp16_10, y = var_161_cast_fp16)[name = tensor<string, []>("op_2314_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_41_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2314_cast_fp16)[name = tensor<string, []>("key_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2316_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2316_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2318_cast_fp16 = mul(x = var_78_cast_fp16_10, y = var_161_cast_fp16)[name = tensor<string, []>("op_2318_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_41_cast_fp16 = add(x = var_2316_cast_fp16, y = var_2318_cast_fp16)[name = tensor<string, []>("value_41_cast_fp16")];
+            tensor<int32, [4]> var_2321 = const()[name = tensor<string, []>("op_2321"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2322_cast_fp16 = reshape(shape = var_2321, x = query_41_cast_fp16)[name = tensor<string, []>("op_2322_cast_fp16")];
+            tensor<fp16, []> var_2323_to_fp16 = const()[name = tensor<string, []>("op_2323_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2324_cast_fp16 = mul(x = var_2322_cast_fp16, y = var_2323_to_fp16)[name = tensor<string, []>("op_2324_cast_fp16")];
+            tensor<int32, [4]> var_2325 = const()[name = tensor<string, []>("op_2325"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2326_cast_fp16 = reshape(shape = var_2325, x = key_41_cast_fp16)[name = tensor<string, []>("op_2326_cast_fp16")];
+            tensor<bool, []> mh_w_61_transpose_x_0 = const()[name = tensor<string, []>("mh_w_61_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_61_transpose_y_0 = const()[name = tensor<string, []>("mh_w_61_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2324_cast_fp16, y = var_2326_cast_fp16)[name = tensor<string, []>("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_63_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2334_cast_fp16 = softmax(axis = var_2248, x = mh_w_63_cast_fp16)[name = tensor<string, []>("op_2334_cast_fp16")];
+            tensor<int32, [4]> var_2335 = const()[name = tensor<string, []>("op_2335"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2336_cast_fp16 = reshape(shape = var_2335, x = value_41_cast_fp16)[name = tensor<string, []>("op_2336_cast_fp16")];
+            tensor<bool, []> attn_41_transpose_x_0 = const()[name = tensor<string, []>("attn_41_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_41_transpose_y_0 = const()[name = tensor<string, []>("attn_41_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_2336_cast_fp16, y = var_2334_cast_fp16)[name = tensor<string, []>("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_2339 = const()[name = tensor<string, []>("op_2339"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_101_cast_fp16 = reshape(shape = var_2339, x = attn_41_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<int32, [2]> var_2343 = const()[name = tensor<string, []>("op_2343"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2345 = const()[name = tensor<string, []>("op_2345"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_147_pad_type_0 = const()[name = tensor<string, []>("obj_147_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_147_pad_0 = const()[name = tensor<string, []>("obj_147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(272921856)))];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274101568)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_147_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = var_2345, groups = var_2255, pad = obj_147_pad_0, pad_type = obj_147_pad_type_0, strides = var_2343, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("obj_147_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_147_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> var_2355 = const()[name = tensor<string, []>("op_2355"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_63_cast_fp16 = reduce_mean(axes = var_2355, keep_dims = var_2256, x = inputs_63_cast_fp16)[name = tensor<string, []>("channels_mean_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_63_cast_fp16 = sub(x = inputs_63_cast_fp16, y = channels_mean_63_cast_fp16)[name = tensor<string, []>("zero_mean_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = zero_mean_63_cast_fp16)[name = tensor<string, []>("zero_mean_sq_63_cast_fp16")];
+            tensor<int32, [1]> var_2359 = const()[name = tensor<string, []>("op_2359"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2360_cast_fp16 = reduce_mean(axes = var_2359, keep_dims = var_2256, x = zero_mean_sq_63_cast_fp16)[name = tensor<string, []>("op_2360_cast_fp16")];
+            tensor<fp16, []> var_2361_to_fp16 = const()[name = tensor<string, []>("op_2361_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2362_cast_fp16 = add(x = var_2360_cast_fp16, y = var_2361_to_fp16)[name = tensor<string, []>("op_2362_cast_fp16")];
+            tensor<fp16, []> denom_63_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_63_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_63_cast_fp16 = rsqrt(epsilon = denom_63_epsilon_0_to_fp16, x = var_2362_cast_fp16)[name = tensor<string, []>("denom_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = denom_63_cast_fp16)[name = tensor<string, []>("out_63_cast_fp16")];
+            tensor<fp16, [768]> obj_149_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_149_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274103168)))];
+            tensor<fp16, [768]> obj_149_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_149_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274104768)))];
+            tensor<fp16, []> obj_149_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_149_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_149_cast_fp16 = batch_norm(beta = obj_149_beta_0_to_fp16, epsilon = obj_149_epsilon_0_to_fp16, gamma = obj_149_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor<string, []>("obj_149_cast_fp16")];
+            tensor<int32, [2]> var_2377 = const()[name = tensor<string, []>("op_2377"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2379 = const()[name = tensor<string, []>("op_2379"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_43_pad_type_0 = const()[name = tensor<string, []>("query_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_43_pad_0 = const()[name = tensor<string, []>("query_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274106368)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275286080)))];
+            tensor<fp16, [1, 768, 1, 1]> query_43_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_bias_to_fp16, dilations = var_2379, groups = var_2255, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = var_2377, weight = layers_10_encoder_attn_q_proj_weight_to_fp16, x = obj_149_cast_fp16)[name = tensor<string, []>("query_43_cast_fp16")];
+            tensor<int32, [2]> var_2383 = const()[name = tensor<string, []>("op_2383"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2385 = const()[name = tensor<string, []>("op_2385"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_43_pad_type_0 = const()[name = tensor<string, []>("key_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_43_pad_0 = const()[name = tensor<string, []>("key_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275287680)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_43_cast_fp16 = conv(dilations = var_2385, groups = var_2255, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = var_2383, weight = layers_10_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_43_cast_fp16")];
+            tensor<int32, [2]> var_2390 = const()[name = tensor<string, []>("op_2390"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2392 = const()[name = tensor<string, []>("op_2392"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_43_pad_type_0 = const()[name = tensor<string, []>("value_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_43_pad_0 = const()[name = tensor<string, []>("value_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276467392)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277647104)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_43_cast_fp16 = conv(bias = layers_10_encoder_attn_v_proj_bias_to_fp16, dilations = var_2392, groups = var_2255, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = var_2390, weight = layers_10_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_43_cast_fp16")];
+            tensor<int32, [4]> var_2396 = const()[name = tensor<string, []>("op_2396"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2397_cast_fp16 = reshape(shape = var_2396, x = query_43_cast_fp16)[name = tensor<string, []>("op_2397_cast_fp16")];
+            tensor<fp16, []> var_2398_to_fp16 = const()[name = tensor<string, []>("op_2398_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2399_cast_fp16 = mul(x = var_2397_cast_fp16, y = var_2398_to_fp16)[name = tensor<string, []>("op_2399_cast_fp16")];
+            tensor<int32, [4]> var_2400 = const()[name = tensor<string, []>("op_2400"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2401_cast_fp16 = reshape(shape = var_2400, x = key_43_cast_fp16)[name = tensor<string, []>("op_2401_cast_fp16")];
+            tensor<bool, []> mh_w_65_transpose_x_0 = const()[name = tensor<string, []>("mh_w_65_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_65_transpose_y_0 = const()[name = tensor<string, []>("mh_w_65_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2399_cast_fp16, y = var_2401_cast_fp16)[name = tensor<string, []>("mh_w_65_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_153_cast_fp16 = softmax(axis = var_2248, x = mh_w_65_cast_fp16)[name = tensor<string, []>("obj_153_cast_fp16")];
+            tensor<int32, [4]> var_2405 = const()[name = tensor<string, []>("op_2405"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2406_cast_fp16 = reshape(shape = var_2405, x = value_43_cast_fp16)[name = tensor<string, []>("op_2406_cast_fp16")];
+            tensor<bool, []> attn_43_transpose_x_0 = const()[name = tensor<string, []>("attn_43_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_43_transpose_y_0 = const()[name = tensor<string, []>("attn_43_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_2406_cast_fp16, y = obj_153_cast_fp16)[name = tensor<string, []>("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_2409 = const()[name = tensor<string, []>("op_2409"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_103_cast_fp16 = reshape(shape = var_2409, x = attn_43_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<int32, [2]> var_2413 = const()[name = tensor<string, []>("op_2413"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2415 = const()[name = tensor<string, []>("op_2415"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_151_pad_type_0 = const()[name = tensor<string, []>("obj_151_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_151_pad_0 = const()[name = tensor<string, []>("obj_151_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277648704)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278828416)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_151_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_bias_to_fp16, dilations = var_2415, groups = var_2255, pad = obj_151_pad_0, pad_type = obj_151_pad_type_0, strides = var_2413, weight = layers_10_encoder_attn_o_proj_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("obj_151_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_151_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, [1]> var_2424 = const()[name = tensor<string, []>("op_2424"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_65_cast_fp16 = reduce_mean(axes = var_2424, keep_dims = var_2256, x = inputs_65_cast_fp16)[name = tensor<string, []>("channels_mean_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_65_cast_fp16 = sub(x = inputs_65_cast_fp16, y = channels_mean_65_cast_fp16)[name = tensor<string, []>("zero_mean_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = zero_mean_65_cast_fp16)[name = tensor<string, []>("zero_mean_sq_65_cast_fp16")];
+            tensor<int32, [1]> var_2428 = const()[name = tensor<string, []>("op_2428"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2429_cast_fp16 = reduce_mean(axes = var_2428, keep_dims = var_2256, x = zero_mean_sq_65_cast_fp16)[name = tensor<string, []>("op_2429_cast_fp16")];
+            tensor<fp16, []> var_2430_to_fp16 = const()[name = tensor<string, []>("op_2430_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2431_cast_fp16 = add(x = var_2429_cast_fp16, y = var_2430_to_fp16)[name = tensor<string, []>("op_2431_cast_fp16")];
+            tensor<fp16, []> denom_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_65_cast_fp16 = rsqrt(epsilon = denom_65_epsilon_0_to_fp16, x = var_2431_cast_fp16)[name = tensor<string, []>("denom_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = denom_65_cast_fp16)[name = tensor<string, []>("out_65_cast_fp16")];
+            tensor<fp16, [768]> input_105_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278830016)))];
+            tensor<fp16, [768]> input_105_beta_0_to_fp16 = const()[name = tensor<string, []>("input_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278831616)))];
+            tensor<fp16, []> input_105_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_105_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<int32, [2]> var_2442 = const()[name = tensor<string, []>("op_2442"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2444 = const()[name = tensor<string, []>("op_2444"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_107_pad_type_0 = const()[name = tensor<string, []>("input_107_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_107_pad_0 = const()[name = tensor<string, []>("input_107_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278833216)))];
+            tensor<fp16, [3072]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283551872)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_107_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = var_2444, groups = var_2255, pad = input_107_pad_0, pad_type = input_107_pad_type_0, strides = var_2442, weight = layers_10_fc1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<int32, [2]> var_2450 = const()[name = tensor<string, []>("op_2450"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2452 = const()[name = tensor<string, []>("op_2452"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283558080)))];
+            tensor<fp16, [768]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288276736)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_23_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = var_2452, groups = var_2255, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = var_2450, weight = layers_10_fc2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, []> var_2466 = const()[name = tensor<string, []>("op_2466"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2473 = const()[name = tensor<string, []>("op_2473"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2474 = const()[name = tensor<string, []>("op_2474"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2486 = const()[name = tensor<string, []>("op_2486"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_67_cast_fp16 = reduce_mean(axes = var_2486, keep_dims = var_2474, x = inputs_67_cast_fp16)[name = tensor<string, []>("channels_mean_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_67_cast_fp16 = sub(x = inputs_67_cast_fp16, y = channels_mean_67_cast_fp16)[name = tensor<string, []>("zero_mean_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = zero_mean_67_cast_fp16)[name = tensor<string, []>("zero_mean_sq_67_cast_fp16")];
+            tensor<int32, [1]> var_2490 = const()[name = tensor<string, []>("op_2490"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2491_cast_fp16 = reduce_mean(axes = var_2490, keep_dims = var_2474, x = zero_mean_sq_67_cast_fp16)[name = tensor<string, []>("op_2491_cast_fp16")];
+            tensor<fp16, []> var_2492_to_fp16 = const()[name = tensor<string, []>("op_2492_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2493_cast_fp16 = add(x = var_2491_cast_fp16, y = var_2492_to_fp16)[name = tensor<string, []>("op_2493_cast_fp16")];
+            tensor<fp16, []> denom_67_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_67_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_67_cast_fp16 = rsqrt(epsilon = denom_67_epsilon_0_to_fp16, x = var_2493_cast_fp16)[name = tensor<string, []>("denom_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = denom_67_cast_fp16)[name = tensor<string, []>("out_67_cast_fp16")];
+            tensor<fp16, [768]> obj_155_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_155_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288278336)))];
+            tensor<fp16, [768]> obj_155_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_155_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288279936)))];
+            tensor<fp16, []> obj_155_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_155_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_155_cast_fp16 = batch_norm(beta = obj_155_beta_0_to_fp16, epsilon = obj_155_epsilon_0_to_fp16, gamma = obj_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor<string, []>("obj_155_cast_fp16")];
+            tensor<int32, [2]> var_2508 = const()[name = tensor<string, []>("op_2508"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2510 = const()[name = tensor<string, []>("op_2510"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_45_pad_type_0 = const()[name = tensor<string, []>("query_45_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_45_pad_0 = const()[name = tensor<string, []>("query_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288281536)))];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289461248)))];
+            tensor<fp16, [1, 768, 1, 1]> query_45_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = var_2510, groups = var_2473, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = var_2508, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("query_45_cast_fp16")];
+            tensor<int32, [2]> var_2514 = const()[name = tensor<string, []>("op_2514"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2516 = const()[name = tensor<string, []>("op_2516"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289462848)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_cast_fp16 = conv(dilations = var_2516, groups = var_2473, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_2514, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<int32, [2]> var_2521 = const()[name = tensor<string, []>("op_2521"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2523 = const()[name = tensor<string, []>("op_2523"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290642560)))];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291822272)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = var_2523, groups = var_2473, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_2521, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2530_cast_fp16 = mul(x = current_key_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2530_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2532_cast_fp16 = mul(x = var_63_cast_fp16_11, y = var_161_cast_fp16)[name = tensor<string, []>("op_2532_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_45_cast_fp16 = add(x = var_2530_cast_fp16, y = var_2532_cast_fp16)[name = tensor<string, []>("key_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2534_cast_fp16 = mul(x = current_value_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2534_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2536_cast_fp16 = mul(x = var_78_cast_fp16_11, y = var_161_cast_fp16)[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_45_cast_fp16 = add(x = var_2534_cast_fp16, y = var_2536_cast_fp16)[name = tensor<string, []>("value_45_cast_fp16")];
+            tensor<int32, [4]> var_2539 = const()[name = tensor<string, []>("op_2539"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2540_cast_fp16 = reshape(shape = var_2539, x = query_45_cast_fp16)[name = tensor<string, []>("op_2540_cast_fp16")];
+            tensor<fp16, []> var_2541_to_fp16 = const()[name = tensor<string, []>("op_2541_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2542_cast_fp16 = mul(x = var_2540_cast_fp16, y = var_2541_to_fp16)[name = tensor<string, []>("op_2542_cast_fp16")];
+            tensor<int32, [4]> var_2543 = const()[name = tensor<string, []>("op_2543"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2544_cast_fp16 = reshape(shape = var_2543, x = key_45_cast_fp16)[name = tensor<string, []>("op_2544_cast_fp16")];
+            tensor<bool, []> mh_w_67_transpose_x_0 = const()[name = tensor<string, []>("mh_w_67_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_67_transpose_y_0 = const()[name = tensor<string, []>("mh_w_67_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_2542_cast_fp16, y = var_2544_cast_fp16)[name = tensor<string, []>("mh_w_67_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_69_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2552_cast_fp16 = softmax(axis = var_2466, x = mh_w_69_cast_fp16)[name = tensor<string, []>("op_2552_cast_fp16")];
+            tensor<int32, [4]> var_2553 = const()[name = tensor<string, []>("op_2553"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2554_cast_fp16 = reshape(shape = var_2553, x = value_45_cast_fp16)[name = tensor<string, []>("op_2554_cast_fp16")];
+            tensor<bool, []> attn_45_transpose_x_0 = const()[name = tensor<string, []>("attn_45_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_45_transpose_y_0 = const()[name = tensor<string, []>("attn_45_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_2554_cast_fp16, y = var_2552_cast_fp16)[name = tensor<string, []>("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_2557 = const()[name = tensor<string, []>("op_2557"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_111_cast_fp16 = reshape(shape = var_2557, x = attn_45_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<int32, [2]> var_2561 = const()[name = tensor<string, []>("op_2561"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2563 = const()[name = tensor<string, []>("op_2563"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_161_pad_type_0 = const()[name = tensor<string, []>("obj_161_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_161_pad_0 = const()[name = tensor<string, []>("obj_161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291823872)))];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293003584)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_161_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = var_2563, groups = var_2473, pad = obj_161_pad_0, pad_type = obj_161_pad_type_0, strides = var_2561, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("obj_161_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_161_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, [1]> var_2573 = const()[name = tensor<string, []>("op_2573"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_69_cast_fp16 = reduce_mean(axes = var_2573, keep_dims = var_2474, x = inputs_69_cast_fp16)[name = tensor<string, []>("channels_mean_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_69_cast_fp16 = sub(x = inputs_69_cast_fp16, y = channels_mean_69_cast_fp16)[name = tensor<string, []>("zero_mean_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = zero_mean_69_cast_fp16)[name = tensor<string, []>("zero_mean_sq_69_cast_fp16")];
+            tensor<int32, [1]> var_2577 = const()[name = tensor<string, []>("op_2577"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2578_cast_fp16 = reduce_mean(axes = var_2577, keep_dims = var_2474, x = zero_mean_sq_69_cast_fp16)[name = tensor<string, []>("op_2578_cast_fp16")];
+            tensor<fp16, []> var_2579_to_fp16 = const()[name = tensor<string, []>("op_2579_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2580_cast_fp16 = add(x = var_2578_cast_fp16, y = var_2579_to_fp16)[name = tensor<string, []>("op_2580_cast_fp16")];
+            tensor<fp16, []> denom_69_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_69_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_69_cast_fp16 = rsqrt(epsilon = denom_69_epsilon_0_to_fp16, x = var_2580_cast_fp16)[name = tensor<string, []>("denom_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = denom_69_cast_fp16)[name = tensor<string, []>("out_69_cast_fp16")];
+            tensor<fp16, [768]> obj_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_163_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293005184)))];
+            tensor<fp16, [768]> obj_163_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_163_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293006784)))];
+            tensor<fp16, []> obj_163_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_163_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_163_cast_fp16 = batch_norm(beta = obj_163_beta_0_to_fp16, epsilon = obj_163_epsilon_0_to_fp16, gamma = obj_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor<string, []>("obj_163_cast_fp16")];
+            tensor<int32, [2]> var_2595 = const()[name = tensor<string, []>("op_2595"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2597 = const()[name = tensor<string, []>("op_2597"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293008384)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(294188096)))];
+            tensor<fp16, [1, 768, 1, 1]> query_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_bias_to_fp16, dilations = var_2597, groups = var_2473, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_2595, weight = layers_11_encoder_attn_q_proj_weight_to_fp16, x = obj_163_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_2601 = const()[name = tensor<string, []>("op_2601"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2603 = const()[name = tensor<string, []>("op_2603"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(294189696)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = conv(dilations = var_2603, groups = var_2473, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_2601, weight = layers_11_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_2608 = const()[name = tensor<string, []>("op_2608"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2610 = const()[name = tensor<string, []>("op_2610"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295369408)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296549120)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = conv(bias = layers_11_encoder_attn_v_proj_bias_to_fp16, dilations = var_2610, groups = var_2473, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_2608, weight = layers_11_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_2614 = const()[name = tensor<string, []>("op_2614"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2615_cast_fp16 = reshape(shape = var_2614, x = query_cast_fp16)[name = tensor<string, []>("op_2615_cast_fp16")];
+            tensor<fp16, []> var_2616_to_fp16 = const()[name = tensor<string, []>("op_2616_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2617_cast_fp16 = mul(x = var_2615_cast_fp16, y = var_2616_to_fp16)[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<int32, [4]> var_2618 = const()[name = tensor<string, []>("op_2618"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2619_cast_fp16 = reshape(shape = var_2618, x = key_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2617_cast_fp16, y = var_2619_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_167_cast_fp16 = softmax(axis = var_2466, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_167_cast_fp16")];
+            tensor<int32, [4]> var_2623 = const()[name = tensor<string, []>("op_2623"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2624_cast_fp16 = reshape(shape = var_2623, x = value_cast_fp16)[name = tensor<string, []>("op_2624_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2624_cast_fp16, y = obj_167_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_2627 = const()[name = tensor<string, []>("op_2627"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_113_cast_fp16 = reshape(shape = var_2627, x = attn_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<int32, [2]> var_2631 = const()[name = tensor<string, []>("op_2631"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2633 = const()[name = tensor<string, []>("op_2633"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_165_pad_type_0 = const()[name = tensor<string, []>("obj_165_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_165_pad_0 = const()[name = tensor<string, []>("obj_165_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296550720)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297730432)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_165_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_bias_to_fp16, dilations = var_2633, groups = var_2473, pad = obj_165_pad_0, pad_type = obj_165_pad_type_0, strides = var_2631, weight = layers_11_encoder_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("obj_165_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_165_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> var_2639 = const()[name = tensor<string, []>("op_2639"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_71_cast_fp16 = reduce_mean(axes = var_2639, keep_dims = var_2474, x = inputs_71_cast_fp16)[name = tensor<string, []>("channels_mean_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_71_cast_fp16 = sub(x = inputs_71_cast_fp16, y = channels_mean_71_cast_fp16)[name = tensor<string, []>("zero_mean_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = zero_mean_71_cast_fp16)[name = tensor<string, []>("zero_mean_sq_71_cast_fp16")];
+            tensor<int32, [1]> var_2643 = const()[name = tensor<string, []>("op_2643"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2644_cast_fp16 = reduce_mean(axes = var_2643, keep_dims = var_2474, x = zero_mean_sq_71_cast_fp16)[name = tensor<string, []>("op_2644_cast_fp16")];
+            tensor<fp16, []> var_2645_to_fp16 = const()[name = tensor<string, []>("op_2645_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2646_cast_fp16 = add(x = var_2644_cast_fp16, y = var_2645_to_fp16)[name = tensor<string, []>("op_2646_cast_fp16")];
+            tensor<fp16, []> denom_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_71_cast_fp16 = rsqrt(epsilon = denom_71_epsilon_0_to_fp16, x = var_2646_cast_fp16)[name = tensor<string, []>("denom_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = denom_71_cast_fp16)[name = tensor<string, []>("out_71_cast_fp16")];
+            tensor<fp16, [768]> input_115_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297732032)))];
+            tensor<fp16, [768]> input_115_beta_0_to_fp16 = const()[name = tensor<string, []>("input_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297733632)))];
+            tensor<fp16, []> input_115_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_115_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<int32, [2]> var_2657 = const()[name = tensor<string, []>("op_2657"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2659 = const()[name = tensor<string, []>("op_2659"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_117_pad_type_0 = const()[name = tensor<string, []>("input_117_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_117_pad_0 = const()[name = tensor<string, []>("input_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297735232)))];
+            tensor<fp16, [3072]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302453888)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_117_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = var_2659, groups = var_2473, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = var_2657, weight = layers_11_fc1_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_2665 = const()[name = tensor<string, []>("op_2665"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2667 = const()[name = tensor<string, []>("op_2667"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302460096)))];
+            tensor<fp16, [768]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307178752)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_25_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = var_2667, groups = var_2473, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_2665, weight = layers_11_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_2677 = const()[name = tensor<string, []>("op_2677"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2681 = const()[name = tensor<string, []>("op_2681"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_2681, keep_dims = var_2677, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_2685 = const()[name = tensor<string, []>("op_2685"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2686_cast_fp16 = reduce_mean(axes = var_2685, keep_dims = var_2677, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_2686_cast_fp16")];
+            tensor<fp16, []> var_2687_to_fp16 = const()[name = tensor<string, []>("op_2687_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2688_cast_fp16 = add(x = var_2686_cast_fp16, y = var_2687_to_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_2688_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [768]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307180352)))];
+            tensor<fp16, [768]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307181952)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_2698_axes_0 = const()[name = tensor<string, []>("op_2698_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_2698_cast_fp16 = squeeze(axes = var_2698_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_2698_cast_fp16")];
+            tensor<int32, [3]> var_2701_perm_0 = const()[name = tensor<string, []>("op_2701_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307183552)))];
+            tensor<fp16, [1, 1, 768]> transpose_0 = transpose(perm = var_2701_perm_0, x = var_2698_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_2705 = const()[name = tensor<string, []>("op_2705"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_171_interleave_0 = const()[name = tensor<string, []>("obj_171_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 9216, 1, 1]> key_cache_updates = concat(axis = var_2705, interleave = obj_171_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_171_cast_fp16")];
+            tensor<int32, []> var_2708 = const()[name = tensor<string, []>("op_2708"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_173_interleave_0 = const()[name = tensor<string, []>("obj_173_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 9216, 1, 1]> value_cache_updates = concat(axis = var_2708, interleave = obj_173_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_173_cast_fp16")];
+            tensor<int32, [4]> var_2719_begin_0 = const()[name = tensor<string, []>("op_2719_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_2719_end_0 = const()[name = tensor<string, []>("op_2719_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_2719_end_mask_0 = const()[name = tensor<string, []>("op_2719_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2719_cast_fp16 = slice_by_index(begin = var_2719_begin_0, end = var_2719_end_0, end_mask = var_2719_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_2719_cast_fp16")];
+            tensor<int32, [4]> var_2722_begin_0 = const()[name = tensor<string, []>("op_2722_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2722_end_0 = const()[name = tensor<string, []>("op_2722_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2722_end_mask_0 = const()[name = tensor<string, []>("op_2722_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2722_squeeze_mask_0 = const()[name = tensor<string, []>("op_2722_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2722_cast_fp16 = slice_by_index(begin = var_2722_begin_0, end = var_2722_end_0, end_mask = var_2722_end_mask_0, squeeze_mask = var_2722_squeeze_mask_0, x = var_2719_cast_fp16)[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<int32, [4]> var_2737_begin_0 = const()[name = tensor<string, []>("op_2737_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_2737_end_0 = const()[name = tensor<string, []>("op_2737_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_2737_end_mask_0 = const()[name = tensor<string, []>("op_2737_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2737_cast_fp16 = slice_by_index(begin = var_2737_begin_0, end = var_2737_end_0, end_mask = var_2737_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_2737_cast_fp16")];
+            tensor<int32, [4]> var_2740_begin_0 = const()[name = tensor<string, []>("op_2740_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2740_end_0 = const()[name = tensor<string, []>("op_2740_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2740_end_mask_0 = const()[name = tensor<string, []>("op_2740_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2740_squeeze_mask_0 = const()[name = tensor<string, []>("op_2740_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2740_cast_fp16 = slice_by_index(begin = var_2740_begin_0, end = var_2740_end_0, end_mask = var_2740_end_mask_0, squeeze_mask = var_2740_squeeze_mask_0, x = var_2737_cast_fp16)[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<int32, [4]> var_2755_begin_0 = const()[name = tensor<string, []>("op_2755_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2755_end_0 = const()[name = tensor<string, []>("op_2755_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2755_end_mask_0 = const()[name = tensor<string, []>("op_2755_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2755_cast_fp16 = slice_by_index(begin = var_2755_begin_0, end = var_2755_end_0, end_mask = var_2755_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2755_cast_fp16")];
+            tensor<int32, [4]> var_2758_begin_0 = const()[name = tensor<string, []>("op_2758_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2758_end_0 = const()[name = tensor<string, []>("op_2758_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2758_end_mask_0 = const()[name = tensor<string, []>("op_2758_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2758_squeeze_mask_0 = const()[name = tensor<string, []>("op_2758_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2758_cast_fp16 = slice_by_index(begin = var_2758_begin_0, end = var_2758_end_0, end_mask = var_2758_end_mask_0, squeeze_mask = var_2758_squeeze_mask_0, x = var_2755_cast_fp16)[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<int32, [4]> var_2773_begin_0 = const()[name = tensor<string, []>("op_2773_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_2773_end_0 = const()[name = tensor<string, []>("op_2773_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_2773_end_mask_0 = const()[name = tensor<string, []>("op_2773_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2773_cast_fp16 = slice_by_index(begin = var_2773_begin_0, end = var_2773_end_0, end_mask = var_2773_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2773_cast_fp16")];
+            tensor<int32, [4]> var_2776_begin_0 = const()[name = tensor<string, []>("op_2776_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2776_end_0 = const()[name = tensor<string, []>("op_2776_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2776_end_mask_0 = const()[name = tensor<string, []>("op_2776_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2776_squeeze_mask_0 = const()[name = tensor<string, []>("op_2776_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2776_cast_fp16 = slice_by_index(begin = var_2776_begin_0, end = var_2776_end_0, end_mask = var_2776_end_mask_0, squeeze_mask = var_2776_squeeze_mask_0, x = var_2773_cast_fp16)[name = tensor<string, []>("op_2776_cast_fp16")];
+            tensor<int32, [4]> var_2791_begin_0 = const()[name = tensor<string, []>("op_2791_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_2791_end_0 = const()[name = tensor<string, []>("op_2791_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_2791_end_mask_0 = const()[name = tensor<string, []>("op_2791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2791_cast_fp16 = slice_by_index(begin = var_2791_begin_0, end = var_2791_end_0, end_mask = var_2791_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2791_cast_fp16")];
+            tensor<int32, [4]> var_2794_begin_0 = const()[name = tensor<string, []>("op_2794_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2794_end_0 = const()[name = tensor<string, []>("op_2794_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2794_end_mask_0 = const()[name = tensor<string, []>("op_2794_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2794_squeeze_mask_0 = const()[name = tensor<string, []>("op_2794_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2794_cast_fp16 = slice_by_index(begin = var_2794_begin_0, end = var_2794_end_0, end_mask = var_2794_end_mask_0, squeeze_mask = var_2794_squeeze_mask_0, x = var_2791_cast_fp16)[name = tensor<string, []>("op_2794_cast_fp16")];
+            tensor<int32, [4]> var_2809_begin_0 = const()[name = tensor<string, []>("op_2809_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_2809_end_0 = const()[name = tensor<string, []>("op_2809_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1500])];
+            tensor<bool, [4]> var_2809_end_mask_0 = const()[name = tensor<string, []>("op_2809_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2809_cast_fp16 = slice_by_index(begin = var_2809_begin_0, end = var_2809_end_0, end_mask = var_2809_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2809_cast_fp16")];
+            tensor<int32, [4]> var_2812_begin_0 = const()[name = tensor<string, []>("op_2812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2812_end_0 = const()[name = tensor<string, []>("op_2812_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2812_end_mask_0 = const()[name = tensor<string, []>("op_2812_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2812_squeeze_mask_0 = const()[name = tensor<string, []>("op_2812_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2812_cast_fp16 = slice_by_index(begin = var_2812_begin_0, end = var_2812_end_0, end_mask = var_2812_end_mask_0, squeeze_mask = var_2812_squeeze_mask_0, x = var_2809_cast_fp16)[name = tensor<string, []>("op_2812_cast_fp16")];
+            tensor<int32, [4]> var_2827_begin_0 = const()[name = tensor<string, []>("op_2827_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2827_end_0 = const()[name = tensor<string, []>("op_2827_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2827_end_mask_0 = const()[name = tensor<string, []>("op_2827_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2827_cast_fp16 = slice_by_index(begin = var_2827_begin_0, end = var_2827_end_0, end_mask = var_2827_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2827_cast_fp16")];
+            tensor<int32, [4]> var_2830_begin_0 = const()[name = tensor<string, []>("op_2830_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2830_end_0 = const()[name = tensor<string, []>("op_2830_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2830_end_mask_0 = const()[name = tensor<string, []>("op_2830_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2830_squeeze_mask_0 = const()[name = tensor<string, []>("op_2830_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2830_cast_fp16 = slice_by_index(begin = var_2830_begin_0, end = var_2830_end_0, end_mask = var_2830_end_mask_0, squeeze_mask = var_2830_squeeze_mask_0, x = var_2827_cast_fp16)[name = tensor<string, []>("op_2830_cast_fp16")];
+            tensor<int32, [4]> var_2845_begin_0 = const()[name = tensor<string, []>("op_2845_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_2845_end_0 = const()[name = tensor<string, []>("op_2845_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_2845_end_mask_0 = const()[name = tensor<string, []>("op_2845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2845_cast_fp16 = slice_by_index(begin = var_2845_begin_0, end = var_2845_end_0, end_mask = var_2845_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2845_cast_fp16")];
+            tensor<int32, [4]> var_2848_begin_0 = const()[name = tensor<string, []>("op_2848_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2848_end_0 = const()[name = tensor<string, []>("op_2848_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2848_end_mask_0 = const()[name = tensor<string, []>("op_2848_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2848_squeeze_mask_0 = const()[name = tensor<string, []>("op_2848_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2848_cast_fp16 = slice_by_index(begin = var_2848_begin_0, end = var_2848_end_0, end_mask = var_2848_end_mask_0, squeeze_mask = var_2848_squeeze_mask_0, x = var_2845_cast_fp16)[name = tensor<string, []>("op_2848_cast_fp16")];
+            tensor<int32, [4]> var_2863_begin_0 = const()[name = tensor<string, []>("op_2863_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_2863_end_0 = const()[name = tensor<string, []>("op_2863_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_2863_end_mask_0 = const()[name = tensor<string, []>("op_2863_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2863_cast_fp16 = slice_by_index(begin = var_2863_begin_0, end = var_2863_end_0, end_mask = var_2863_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<int32, [4]> var_2866_begin_0 = const()[name = tensor<string, []>("op_2866_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2866_end_0 = const()[name = tensor<string, []>("op_2866_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2866_end_mask_0 = const()[name = tensor<string, []>("op_2866_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2866_squeeze_mask_0 = const()[name = tensor<string, []>("op_2866_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2866_cast_fp16 = slice_by_index(begin = var_2866_begin_0, end = var_2866_end_0, end_mask = var_2866_end_mask_0, squeeze_mask = var_2866_squeeze_mask_0, x = var_2863_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2881_begin_0 = const()[name = tensor<string, []>("op_2881_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_2881_end_0 = const()[name = tensor<string, []>("op_2881_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_2881_end_mask_0 = const()[name = tensor<string, []>("op_2881_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2881_cast_fp16 = slice_by_index(begin = var_2881_begin_0, end = var_2881_end_0, end_mask = var_2881_end_mask_0, x = obj_153_cast_fp16)[name = tensor<string, []>("op_2881_cast_fp16")];
+            tensor<int32, [4]> var_2884_begin_0 = const()[name = tensor<string, []>("op_2884_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2884_end_0 = const()[name = tensor<string, []>("op_2884_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2884_end_mask_0 = const()[name = tensor<string, []>("op_2884_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2884_squeeze_mask_0 = const()[name = tensor<string, []>("op_2884_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2884_cast_fp16 = slice_by_index(begin = var_2884_begin_0, end = var_2884_end_0, end_mask = var_2884_end_mask_0, squeeze_mask = var_2884_squeeze_mask_0, x = var_2881_cast_fp16)[name = tensor<string, []>("op_2884_cast_fp16")];
+            tensor<int32, []> var_2891 = const()[name = tensor<string, []>("op_2891"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2892_interleave_0 = const()[name = tensor<string, []>("op_2892_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 10, 1500]> var_2892_cast_fp16 = concat(axis = var_2891, interleave = var_2892_interleave_0, values = (var_2722_cast_fp16, var_2740_cast_fp16, var_2758_cast_fp16, var_2776_cast_fp16, var_2794_cast_fp16, var_2812_cast_fp16, var_2830_cast_fp16, var_2848_cast_fp16, var_2866_cast_fp16, var_2884_cast_fp16))[name = tensor<string, []>("op_2892_cast_fp16")];
+            tensor<int32, [1]> var_2894 = const()[name = tensor<string, []>("op_2894"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_2895 = const()[name = tensor<string, []>("op_2895"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_2894, keep_dims = var_2895, x = var_2892_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..3200504bad9d1d06bf4223e5c695bea0257696e5
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea861c6dfdd866ed0f2e7fe0c3df7459daa44481cb25236e03698dd6d259391
+size 313629
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f06c9ac384fec32001d96a53bd48156581906005
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfea8044a8f38e8d33f56585b1e75ce023d3845e2a945e20480bd7e16558016e
+size 307287346
diff --git a/openai_whisper-small/config.json b/openai_whisper-small/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9dee569cf0c20925208ec84fecbb95e873f8bf24
--- /dev/null
+++ b/openai_whisper-small/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-small", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 768, "decoder_attention_heads": 12, "decoder_ffn_dim": 3072, "decoder_layerdrop": 0.0, "decoder_layers": 12, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 12, "encoder_ffn_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 12, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865}
\ No newline at end of file
diff --git a/openai_whisper-small/generation_config.json b/openai_whisper-small/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdd26273f9cd1ab8ecda49f5b8c033134c61cb4a
--- /dev/null
+++ b/openai_whisper-small/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]], "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "decoder_start_token_id": 50258, "eos_token_id": 50257, "forced_decoder_ids": [[1, null], [2, 50359]], "is_multilingual": true, "lang_to_id": {"<|af|>": 50327, "<|am|>": 50334, "<|ar|>": 50272, "<|as|>": 50350, "<|az|>": 50304, "<|ba|>": 50355, "<|be|>": 50330, "<|bg|>": 50292, "<|bn|>": 50302, "<|bo|>": 50347, "<|br|>": 50309, "<|bs|>": 50315, "<|ca|>": 50270, "<|cs|>": 50283, "<|cy|>": 50297, "<|da|>": 50285, "<|de|>": 50261, "<|el|>": 50281, "<|en|>": 50259, "<|es|>": 50262, "<|et|>": 50307, "<|eu|>": 50310, "<|fa|>": 50300, "<|fi|>": 50277, "<|fo|>": 50338, "<|fr|>": 50265, "<|gl|>": 50319, "<|gu|>": 50333, "<|haw|>": 50352, "<|ha|>": 50354, "<|he|>": 50279, "<|hi|>": 50276, "<|hr|>": 50291, "<|ht|>": 50339, "<|hu|>": 50286, "<|hy|>": 50312, "<|id|>": 50275, "<|is|>": 50311, "<|it|>": 50274, "<|ja|>": 50266, "<|jw|>": 50356, "<|ka|>": 50329, "<|kk|>": 50316, "<|km|>": 50323, "<|kn|>": 50306, "<|ko|>": 50264, "<|la|>": 50294, "<|lb|>": 50345, "<|ln|>": 50353, "<|lo|>": 50336, "<|lt|>": 50293, "<|lv|>": 50301, "<|mg|>": 50349, "<|mi|>": 50295, "<|mk|>": 50308, "<|ml|>": 50296, "<|mn|>": 50314, "<|mr|>": 50320, "<|ms|>": 50282, "<|mt|>": 50343, "<|my|>": 50346, "<|ne|>": 50313, "<|nl|>": 50271, "<|nn|>": 50342, "<|no|>": 50288, "<|oc|>": 50328, "<|pa|>": 50321, "<|pl|>": 50269, "<|ps|>": 50340, "<|pt|>": 50267, "<|ro|>": 50284, "<|ru|>": 50263, "<|sa|>": 50344, "<|sd|>": 50332, "<|si|>": 50322, "<|sk|>": 50298, "<|sl|>": 50305, "<|sn|>": 50324, "<|so|>": 50326, "<|sq|>": 50317, "<|sr|>": 50303, "<|su|>": 50357, "<|sv|>": 50273, "<|sw|>": 50318, "<|ta|>": 50287, "<|te|>": 50299, "<|tg|>": 50331, "<|th|>": 50289, "<|tk|>": 50341, "<|tl|>": 50348, "<|tr|>": 50268, "<|tt|>": 50351, "<|uk|>": 50280, "<|ur|>": 50290, "<|uz|>": 50337, "<|vi|>": 50278, "<|yi|>": 50335, "<|yo|>": 50325, "<|zh|>": 50260}, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50363, "pad_token_id": 50257, "prev_sot_token_id": 50361, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "task_to_id": {"transcribe": 50359, "translate": 50358}, "transformers_version": "4.31.0.dev0"}
\ No newline at end of file