diff --git "a/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" @@ -0,0 +1,2593 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor cache_length, tensor decoder_key_padding_mask, state> encoder_attn_key_cache, state> encoder_attn_key_padding_mask, state> encoder_attn_value_cache, tensor input_ids, tensor kv_cache_update_mask, state> self_attn_key_cache, state> self_attn_value_cache) { + int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)]; + int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)]; + bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)]; + tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = input_ids, validate_indices = var_42_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_42_cast_fp16")]; + int32 var_49_axis_0 = const()[name = string("op_49_axis_0"), val = int32(0)]; + int32 var_49_batch_dims_0 = const()[name = string("op_49_batch_dims_0"), val = int32(0)]; + bool var_49_validate_indices_0 = const()[name = string("op_49_validate_indices_0"), val = bool(false)]; + tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79664768)))]; + string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; + tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")]; + tensor var_49_cast_fp16_cast_uint16 = gather(axis = var_49_axis_0, batch_dims = var_49_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_49_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_49_cast_fp16_cast_uint16")]; + int32 var_51_axis_0 = const()[name = string("op_51_axis_0"), val = int32(0)]; + int32 var_51_batch_dims_0 = const()[name = string("op_51_batch_dims_0"), val = int32(0)]; + bool var_51_validate_indices_0 = const()[name = string("op_51_validate_indices_0"), val = bool(false)]; + tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80361920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80352960))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")]; + tensor var_51_cast_fp16_cast_uint16 = gather(axis = var_51_axis_0, batch_dims = var_51_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_51_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_51_cast_fp16_cast_uint16")]; + tensor var_52_cast_fp16 = add(x = var_49_cast_fp16_cast_uint16, y = var_51_cast_fp16_cast_uint16)[name = string("op_52_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = add(x = var_42_cast_fp16, y = var_52_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; + tensor var_66_axes_0 = const()[name = string("op_66_axes_0"), val = tensor([2])]; + tensor var_66_cast_fp16 = expand_dims(axes = var_66_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_66_cast_fp16")]; + tensor inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor([3])]; + tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_66_cast_fp16)[name = string("inputs_1_cast_fp16")]; + tensor read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")]; + tensor tile_0 = const()[name = string("tile_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80404992)))]; + int32 var_71_axis_0 = const()[name = string("op_71_axis_0"), val = int32(0)]; + tensor var_71_cast_fp16_0, tensor var_71_cast_fp16_1, tensor var_71_cast_fp16_2, tensor var_71_cast_fp16_3, tensor var_71_cast_fp16_4, tensor var_71_cast_fp16_5, tensor var_71_cast_fp16_6, tensor var_71_cast_fp16_7, tensor var_71_cast_fp16_8, tensor var_71_cast_fp16_9, tensor var_71_cast_fp16_10, tensor var_71_cast_fp16_11 = split(axis = var_71_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_71_cast_fp16")]; + tensor read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")]; + tensor tile_1 = const()[name = string("tile_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80405120)))]; + int32 var_86_axis_0 = const()[name = string("op_86_axis_0"), val = int32(0)]; + tensor var_86_cast_fp16_0, tensor var_86_cast_fp16_1, tensor var_86_cast_fp16_2, tensor var_86_cast_fp16_3, tensor var_86_cast_fp16_4, tensor var_86_cast_fp16_5, tensor var_86_cast_fp16_6, tensor var_86_cast_fp16_7, tensor var_86_cast_fp16_8, tensor var_86_cast_fp16_9, tensor var_86_cast_fp16_10, tensor var_86_cast_fp16_11 = split(axis = var_86_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_86_cast_fp16")]; + tensor read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")]; + tensor obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor([1, 768, 1, 1536])]; + tensor obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")]; + tensor read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")]; + tensor obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor([1, 768, 1, 1536])]; + tensor obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")]; + int32 var_114 = const()[name = string("op_114"), val = int32(3)]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_139_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80405248)))]; + tensor obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80406848)))]; + tensor obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80408448)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80410048)))]; + fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")]; + string var_161_pad_type_0 = const()[name = string("op_161_pad_type_0"), val = string("valid")]; + tensor var_161_strides_0 = const()[name = string("op_161_strides_0"), val = tensor([1, 1])]; + tensor var_161_pad_0 = const()[name = string("op_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_dilations_0 = const()[name = string("op_161_dilations_0"), val = tensor([1, 1])]; + int32 var_161_groups_0 = const()[name = string("op_161_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80411648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80706624))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80706752)))]; + tensor var_161_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_161_dilations_0, groups = var_161_groups_0, pad = var_161_pad_0, pad_type = var_161_pad_type_0, strides = var_161_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_161_cast_fp16")]; + string var_167_pad_type_0 = const()[name = string("op_167_pad_type_0"), val = string("valid")]; + tensor var_167_strides_0 = const()[name = string("op_167_strides_0"), val = tensor([1, 1])]; + tensor var_167_pad_0 = const()[name = string("op_167_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_167_dilations_0 = const()[name = string("op_167_dilations_0"), val = tensor([1, 1])]; + int32 var_167_groups_0 = const()[name = string("op_167_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80719808))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80708352))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_167_cast_fp16 = conv(dilations = var_167_dilations_0, groups = var_167_groups_0, pad = var_167_pad_0, pad_type = var_167_pad_type_0, strides = var_167_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_167_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_161_cast_fp16, y = var_167_cast_fp16)[name = string("query_1_cast_fp16")]; + string var_176_pad_type_0 = const()[name = string("op_176_pad_type_0"), val = string("valid")]; + tensor var_176_strides_0 = const()[name = string("op_176_strides_0"), val = tensor([1, 1])]; + tensor var_176_pad_0 = const()[name = string("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_dilations_0 = const()[name = string("op_176_dilations_0"), val = tensor([1, 1])]; + int32 var_176_groups_0 = const()[name = string("op_176_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80793600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81088576))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_176_cast_fp16 = conv(dilations = var_176_dilations_0, groups = var_176_groups_0, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_176_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_176_cast_fp16")]; + string var_182_pad_type_0 = const()[name = string("op_182_pad_type_0"), val = string("valid")]; + tensor var_182_strides_0 = const()[name = string("op_182_strides_0"), val = tensor([1, 1])]; + tensor var_182_pad_0 = const()[name = string("op_182_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_182_dilations_0 = const()[name = string("op_182_dilations_0"), val = tensor([1, 1])]; + int32 var_182_groups_0 = const()[name = string("op_182_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81100800))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81088704))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_182_cast_fp16 = conv(dilations = var_182_dilations_0, groups = var_182_groups_0, pad = var_182_pad_0, pad_type = var_182_pad_type_0, strides = var_182_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_182_cast_fp16")]; + tensor current_key_1_cast_fp16 = add(x = var_176_cast_fp16, y = var_182_cast_fp16)[name = string("current_key_1_cast_fp16")]; + string var_192_pad_type_0 = const()[name = string("op_192_pad_type_0"), val = string("valid")]; + tensor var_192_strides_0 = const()[name = string("op_192_strides_0"), val = tensor([1, 1])]; + tensor var_192_pad_0 = const()[name = string("op_192_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_192_dilations_0 = const()[name = string("op_192_dilations_0"), val = tensor([1, 1])]; + int32 var_192_groups_0 = const()[name = string("op_192_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81174592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81469568))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81469696)))]; + tensor var_192_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_192_dilations_0, groups = var_192_groups_0, pad = var_192_pad_0, pad_type = var_192_pad_type_0, strides = var_192_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_192_cast_fp16")]; + string var_198_pad_type_0 = const()[name = string("op_198_pad_type_0"), val = string("valid")]; + tensor var_198_strides_0 = const()[name = string("op_198_strides_0"), val = tensor([1, 1])]; + tensor var_198_pad_0 = const()[name = string("op_198_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_198_dilations_0 = const()[name = string("op_198_dilations_0"), val = tensor([1, 1])]; + int32 var_198_groups_0 = const()[name = string("op_198_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81478080))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81471296))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_198_cast_fp16 = conv(dilations = var_198_dilations_0, groups = var_198_groups_0, pad = var_198_pad_0, pad_type = var_198_pad_type_0, strides = var_198_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_198_cast_fp16")]; + tensor current_value_1_cast_fp16 = add(x = var_192_cast_fp16, y = var_198_cast_fp16)[name = string("current_value_1_cast_fp16")]; + tensor var_201_axes_0 = const()[name = string("op_201_axes_0"), val = tensor([1])]; + tensor var_201_cast_fp16 = expand_dims(axes = var_201_axes_0, x = kv_cache_update_mask)[name = string("op_201_cast_fp16")]; + tensor var_202_axes_0 = const()[name = string("op_202_axes_0"), val = tensor([2])]; + tensor var_202_cast_fp16 = expand_dims(axes = var_202_axes_0, x = var_201_cast_fp16)[name = string("op_202_cast_fp16")]; + tensor var_204_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_71_cast_fp16_0, y = var_204_cast_fp16)[name = string("key_1_cast_fp16")]; + tensor var_206_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_206_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_86_cast_fp16_0, y = var_206_cast_fp16)[name = string("value_1_cast_fp16")]; + tensor var_209 = const()[name = string("op_209"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_209, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; + fp16 var_211_to_fp16 = const()[name = string("op_211_to_fp16"), val = fp16(0x1p-3)]; + tensor var_212_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_211_to_fp16)[name = string("op_212_cast_fp16")]; + tensor var_213 = const()[name = string("op_213"), val = tensor([1, 12, 64, -1])]; + tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_1_cast_fp16)[name = string("op_214_cast_fp16")]; + bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; + bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_212_cast_fp16, y = var_214_cast_fp16)[name = string("mh_w_1_cast_fp16")]; + tensor var_218_axes_0 = const()[name = string("op_218_axes_0"), val = tensor([1])]; + tensor var_218_cast_fp16 = expand_dims(axes = var_218_axes_0, x = decoder_key_padding_mask)[name = string("op_218_cast_fp16")]; + tensor var_219_axes_0 = const()[name = string("op_219_axes_0"), val = tensor([2])]; + tensor var_219_cast_fp16 = expand_dims(axes = var_219_axes_0, x = var_218_cast_fp16)[name = string("op_219_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_3_cast_fp16")]; + tensor var_222_cast_fp16 = softmax(axis = var_114, x = mh_w_3_cast_fp16)[name = string("op_222_cast_fp16")]; + tensor var_223 = const()[name = string("op_223"), val = tensor([1, 12, 64, -1])]; + tensor var_224_cast_fp16 = reshape(shape = var_223, x = value_1_cast_fp16)[name = string("op_224_cast_fp16")]; + bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; + bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_224_cast_fp16, y = var_222_cast_fp16)[name = string("attn_1_cast_fp16")]; + tensor var_227 = const()[name = string("op_227"), val = tensor([1, 768, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_227, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_237_pad_type_0 = const()[name = string("op_237_pad_type_0"), val = string("valid")]; + tensor var_237_strides_0 = const()[name = string("op_237_strides_0"), val = tensor([1, 1])]; + tensor var_237_pad_0 = const()[name = string("op_237_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_237_dilations_0 = const()[name = string("op_237_dilations_0"), val = tensor([1, 1])]; + int32 var_237_groups_0 = const()[name = string("op_237_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81846848))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81846976)))]; + tensor var_237_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_237_dilations_0, groups = var_237_groups_0, pad = var_237_pad_0, pad_type = var_237_pad_type_0, strides = var_237_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_237_cast_fp16")]; + string var_243_pad_type_0 = const()[name = string("op_243_pad_type_0"), val = string("valid")]; + tensor var_243_strides_0 = const()[name = string("op_243_strides_0"), val = tensor([1, 1])]; + tensor var_243_pad_0 = const()[name = string("op_243_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_243_dilations_0 = const()[name = string("op_243_dilations_0"), val = tensor([1, 1])]; + int32 var_243_groups_0 = const()[name = string("op_243_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81857344))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81848576))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_243_cast_fp16 = conv(dilations = var_243_dilations_0, groups = var_243_groups_0, pad = var_243_pad_0, pad_type = var_243_pad_type_0, strides = var_243_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_243_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_237_cast_fp16, y = var_243_cast_fp16)[name = string("obj_11_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor([1])]; + fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_258_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81931136)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81932736)))]; + fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")]; + string var_278_pad_type_0 = const()[name = string("op_278_pad_type_0"), val = string("valid")]; + tensor var_278_strides_0 = const()[name = string("op_278_strides_0"), val = tensor([1, 1])]; + tensor var_278_pad_0 = const()[name = string("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_dilations_0 = const()[name = string("op_278_dilations_0"), val = tensor([1, 1])]; + int32 var_278_groups_0 = const()[name = string("op_278_groups_0"), val = int32(1)]; + tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81934336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82229312))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82229440)))]; + tensor var_278_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_278_dilations_0, groups = var_278_groups_0, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_278_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_278_cast_fp16")]; + string var_284_pad_type_0 = const()[name = string("op_284_pad_type_0"), val = string("valid")]; + tensor var_284_strides_0 = const()[name = string("op_284_strides_0"), val = tensor([1, 1])]; + tensor var_284_pad_0 = const()[name = string("op_284_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_284_dilations_0 = const()[name = string("op_284_dilations_0"), val = tensor([1, 1])]; + int32 var_284_groups_0 = const()[name = string("op_284_groups_0"), val = int32(1)]; + tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82243008))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82231040))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_284_cast_fp16 = conv(dilations = var_284_dilations_0, groups = var_284_groups_0, pad = var_284_pad_0, pad_type = var_284_pad_type_0, strides = var_284_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_284_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_278_cast_fp16, y = var_284_cast_fp16)[name = string("query_3_cast_fp16")]; + tensor var_287 = const()[name = string("op_287"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_287, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")]; + fp16 var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = fp16(0x1p-3)]; + tensor var_290_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_289_to_fp16)[name = string("op_290_cast_fp16")]; + tensor var_291 = const()[name = string("op_291"), val = tensor([1, 12, 64, -1])]; + tensor var_292_cast_fp16 = reshape(shape = var_291, x = obj_17_cast_fp16)[name = string("op_292_cast_fp16")]; + bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; + bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_290_cast_fp16, y = var_292_cast_fp16)[name = string("mh_w_5_cast_fp16")]; + tensor read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")]; + tensor var_296_axes_0 = const()[name = string("op_296_axes_0"), val = tensor([1])]; + tensor var_296_cast_fp16 = expand_dims(axes = var_296_axes_0, x = read_state_4)[name = string("op_296_cast_fp16")]; + tensor var_297_axes_0 = const()[name = string("op_297_axes_0"), val = tensor([2])]; + tensor var_297_cast_fp16 = expand_dims(axes = var_297_axes_0, x = var_296_cast_fp16)[name = string("op_297_cast_fp16")]; + tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_7_cast_fp16")]; + tensor obj_23_cast_fp16 = softmax(axis = var_114, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")]; + tensor var_301 = const()[name = string("op_301"), val = tensor([1, 12, 64, -1])]; + tensor var_302_cast_fp16 = reshape(shape = var_301, x = obj_19_cast_fp16)[name = string("op_302_cast_fp16")]; + bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; + bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_302_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")]; + tensor var_305 = const()[name = string("op_305"), val = tensor([1, 768, 1, -1])]; + tensor input_3_cast_fp16 = reshape(shape = var_305, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")]; + string var_315_pad_type_0 = const()[name = string("op_315_pad_type_0"), val = string("valid")]; + tensor var_315_strides_0 = const()[name = string("op_315_strides_0"), val = tensor([1, 1])]; + tensor var_315_pad_0 = const()[name = string("op_315_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_315_dilations_0 = const()[name = string("op_315_dilations_0"), val = tensor([1, 1])]; + int32 var_315_groups_0 = const()[name = string("op_315_groups_0"), val = int32(1)]; + tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82316800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82611776))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82611904)))]; + tensor var_315_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_315_dilations_0, groups = var_315_groups_0, pad = var_315_pad_0, pad_type = var_315_pad_type_0, strides = var_315_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_315_cast_fp16")]; + string var_321_pad_type_0 = const()[name = string("op_321_pad_type_0"), val = string("valid")]; + tensor var_321_strides_0 = const()[name = string("op_321_strides_0"), val = tensor([1, 1])]; + tensor var_321_pad_0 = const()[name = string("op_321_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_321_dilations_0 = const()[name = string("op_321_dilations_0"), val = tensor([1, 1])]; + int32 var_321_groups_0 = const()[name = string("op_321_groups_0"), val = int32(1)]; + tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82619520))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82613504))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_321_cast_fp16 = conv(dilations = var_321_dilations_0, groups = var_321_groups_0, pad = var_321_pad_0, pad_type = var_321_pad_type_0, strides = var_321_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_321_cast_fp16")]; + tensor obj_21_cast_fp16 = add(x = var_315_cast_fp16, y = var_321_cast_fp16)[name = string("obj_21_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")]; + tensor out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor([1])]; + fp16 var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_332_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")]; + tensor input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82693312)))]; + tensor input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82694912)))]; + fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")]; + string var_350_pad_type_0 = const()[name = string("op_350_pad_type_0"), val = string("valid")]; + tensor var_350_strides_0 = const()[name = string("op_350_strides_0"), val = tensor([1, 1])]; + tensor var_350_pad_0 = const()[name = string("op_350_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_350_dilations_0 = const()[name = string("op_350_dilations_0"), val = tensor([1, 1])]; + int32 var_350_groups_0 = const()[name = string("op_350_groups_0"), val = int32(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82696512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83876224))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83876352)))]; + tensor var_350_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_350_dilations_0, groups = var_350_groups_0, pad = var_350_pad_0, pad_type = var_350_pad_type_0, strides = var_350_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_350_cast_fp16")]; + string var_356_pad_type_0 = const()[name = string("op_356_pad_type_0"), val = string("valid")]; + tensor var_356_strides_0 = const()[name = string("op_356_strides_0"), val = tensor([1, 1])]; + tensor var_356_pad_0 = const()[name = string("op_356_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_356_dilations_0 = const()[name = string("op_356_dilations_0"), val = tensor([1, 1])]; + int32 var_356_groups_0 = const()[name = string("op_356_groups_0"), val = int32(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83918848))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83882560))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_356_cast_fp16 = conv(dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_356_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = var_350_cast_fp16, y = var_356_cast_fp16)[name = string("input_7_cast_fp16")]; + string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")]; + tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")]; + string var_367_pad_type_0 = const()[name = string("op_367_pad_type_0"), val = string("valid")]; + tensor var_367_strides_0 = const()[name = string("op_367_strides_0"), val = tensor([1, 1])]; + tensor var_367_pad_0 = const()[name = string("op_367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_367_dilations_0 = const()[name = string("op_367_dilations_0"), val = tensor([1, 1])]; + int32 var_367_groups_0 = const()[name = string("op_367_groups_0"), val = int32(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84213824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85393536))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85393664)))]; + tensor var_367_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_367_cast_fp16")]; + string var_373_pad_type_0 = const()[name = string("op_373_pad_type_0"), val = string("valid")]; + tensor var_373_strides_0 = const()[name = string("op_373_strides_0"), val = tensor([1, 1])]; + tensor var_373_pad_0 = const()[name = string("op_373_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_373_dilations_0 = const()[name = string("op_373_dilations_0"), val = tensor([1, 1])]; + int32 var_373_groups_0 = const()[name = string("op_373_groups_0"), val = int32(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85420224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85395264))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_373_cast_fp16 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_373_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = add(x = var_367_cast_fp16, y = var_373_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")]; + tensor obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor([2, 768, 1, 1536])]; + tensor obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")]; + tensor obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor([2, 768, 1, 1536])]; + tensor obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")]; + int32 var_395 = const()[name = string("op_395"), val = int32(3)]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_420_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85715200)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85716800)))]; + fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")]; + string var_442_pad_type_0 = const()[name = string("op_442_pad_type_0"), val = string("valid")]; + tensor var_442_strides_0 = const()[name = string("op_442_strides_0"), val = tensor([1, 1])]; + tensor var_442_pad_0 = const()[name = string("op_442_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_442_dilations_0 = const()[name = string("op_442_dilations_0"), val = tensor([1, 1])]; + int32 var_442_groups_0 = const()[name = string("op_442_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85718400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86013376))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86013504)))]; + tensor var_442_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_442_cast_fp16")]; + string var_448_pad_type_0 = const()[name = string("op_448_pad_type_0"), val = string("valid")]; + tensor var_448_strides_0 = const()[name = string("op_448_strides_0"), val = tensor([1, 1])]; + tensor var_448_pad_0 = const()[name = string("op_448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_448_dilations_0 = const()[name = string("op_448_dilations_0"), val = tensor([1, 1])]; + int32 var_448_groups_0 = const()[name = string("op_448_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86032320))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86015104))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_448_cast_fp16 = conv(dilations = var_448_dilations_0, groups = var_448_groups_0, pad = var_448_pad_0, pad_type = var_448_pad_type_0, strides = var_448_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_448_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("query_5_cast_fp16")]; + string var_457_pad_type_0 = const()[name = string("op_457_pad_type_0"), val = string("valid")]; + tensor var_457_strides_0 = const()[name = string("op_457_strides_0"), val = tensor([1, 1])]; + tensor var_457_pad_0 = const()[name = string("op_457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_457_dilations_0 = const()[name = string("op_457_dilations_0"), val = tensor([1, 1])]; + int32 var_457_groups_0 = const()[name = string("op_457_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86106112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86401088))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_457_cast_fp16 = conv(dilations = var_457_dilations_0, groups = var_457_groups_0, pad = var_457_pad_0, pad_type = var_457_pad_type_0, strides = var_457_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_457_cast_fp16")]; + string var_463_pad_type_0 = const()[name = string("op_463_pad_type_0"), val = string("valid")]; + tensor var_463_strides_0 = const()[name = string("op_463_strides_0"), val = tensor([1, 1])]; + tensor var_463_pad_0 = const()[name = string("op_463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_463_dilations_0 = const()[name = string("op_463_dilations_0"), val = tensor([1, 1])]; + int32 var_463_groups_0 = const()[name = string("op_463_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86415168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86401216))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_463_cast_fp16 = conv(dilations = var_463_dilations_0, groups = var_463_groups_0, pad = var_463_pad_0, pad_type = var_463_pad_type_0, strides = var_463_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_463_cast_fp16")]; + tensor current_key_3_cast_fp16 = add(x = var_457_cast_fp16, y = var_463_cast_fp16)[name = string("current_key_3_cast_fp16")]; + string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")]; + tensor var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor([1, 1])]; + tensor var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor([1, 1])]; + int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86488960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86783936))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86784064)))]; + tensor var_473_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_473_cast_fp16")]; + string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")]; + tensor var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor([1, 1])]; + tensor var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor([1, 1])]; + int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86802496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86785664))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor current_value_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("current_value_3_cast_fp16")]; + tensor var_485_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_71_cast_fp16_1, y = var_485_cast_fp16)[name = string("key_3_cast_fp16")]; + tensor var_487_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_487_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_86_cast_fp16_1, y = var_487_cast_fp16)[name = string("value_3_cast_fp16")]; + tensor var_490 = const()[name = string("op_490"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_490, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")]; + fp16 var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = fp16(0x1p-3)]; + tensor var_493_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_492_to_fp16)[name = string("op_493_cast_fp16")]; + tensor var_494 = const()[name = string("op_494"), val = tensor([1, 12, 64, -1])]; + tensor var_495_cast_fp16 = reshape(shape = var_494, x = key_3_cast_fp16)[name = string("op_495_cast_fp16")]; + bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; + bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_493_cast_fp16, y = var_495_cast_fp16)[name = string("mh_w_9_cast_fp16")]; + tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_11_cast_fp16")]; + tensor var_503_cast_fp16 = softmax(axis = var_395, x = mh_w_11_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor var_504 = const()[name = string("op_504"), val = tensor([1, 12, 64, -1])]; + tensor var_505_cast_fp16 = reshape(shape = var_504, x = value_3_cast_fp16)[name = string("op_505_cast_fp16")]; + bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; + bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_505_cast_fp16, y = var_503_cast_fp16)[name = string("attn_5_cast_fp16")]; + tensor var_508 = const()[name = string("op_508"), val = tensor([1, 768, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_508, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")]; + string var_518_pad_type_0 = const()[name = string("op_518_pad_type_0"), val = string("valid")]; + tensor var_518_strides_0 = const()[name = string("op_518_strides_0"), val = tensor([1, 1])]; + tensor var_518_pad_0 = const()[name = string("op_518_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_518_dilations_0 = const()[name = string("op_518_dilations_0"), val = tensor([1, 1])]; + int32 var_518_groups_0 = const()[name = string("op_518_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86876288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87171264))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87171392)))]; + tensor var_518_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_518_dilations_0, groups = var_518_groups_0, pad = var_518_pad_0, pad_type = var_518_pad_type_0, strides = var_518_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_518_cast_fp16")]; + string var_524_pad_type_0 = const()[name = string("op_524_pad_type_0"), val = string("valid")]; + tensor var_524_strides_0 = const()[name = string("op_524_strides_0"), val = tensor([1, 1])]; + tensor var_524_pad_0 = const()[name = string("op_524_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_524_dilations_0 = const()[name = string("op_524_dilations_0"), val = tensor([1, 1])]; + int32 var_524_groups_0 = const()[name = string("op_524_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87189952))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87172992))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_524_cast_fp16 = conv(dilations = var_524_dilations_0, groups = var_524_groups_0, pad = var_524_pad_0, pad_type = var_524_pad_type_0, strides = var_524_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_524_cast_fp16")]; + tensor obj_31_cast_fp16 = add(x = var_518_cast_fp16, y = var_524_cast_fp16)[name = string("obj_31_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")]; + tensor out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor([1])]; + fp16 var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_539_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87263744)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87265344)))]; + fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")]; + string var_559_pad_type_0 = const()[name = string("op_559_pad_type_0"), val = string("valid")]; + tensor var_559_strides_0 = const()[name = string("op_559_strides_0"), val = tensor([1, 1])]; + tensor var_559_pad_0 = const()[name = string("op_559_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_559_dilations_0 = const()[name = string("op_559_dilations_0"), val = tensor([1, 1])]; + int32 var_559_groups_0 = const()[name = string("op_559_groups_0"), val = int32(1)]; + tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87266944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87561920))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87562048)))]; + tensor var_559_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_559_dilations_0, groups = var_559_groups_0, pad = var_559_pad_0, pad_type = var_559_pad_type_0, strides = var_559_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_559_cast_fp16")]; + string var_565_pad_type_0 = const()[name = string("op_565_pad_type_0"), val = string("valid")]; + tensor var_565_strides_0 = const()[name = string("op_565_strides_0"), val = tensor([1, 1])]; + tensor var_565_pad_0 = const()[name = string("op_565_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_565_dilations_0 = const()[name = string("op_565_dilations_0"), val = tensor([1, 1])]; + int32 var_565_groups_0 = const()[name = string("op_565_groups_0"), val = int32(1)]; + tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87570560))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87563648))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_565_cast_fp16 = conv(dilations = var_565_dilations_0, groups = var_565_groups_0, pad = var_565_pad_0, pad_type = var_565_pad_type_0, strides = var_565_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_565_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_559_cast_fp16, y = var_565_cast_fp16)[name = string("query_7_cast_fp16")]; + tensor var_568 = const()[name = string("op_568"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_568, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")]; + fp16 var_570_to_fp16 = const()[name = string("op_570_to_fp16"), val = fp16(0x1p-3)]; + tensor var_571_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_570_to_fp16)[name = string("op_571_cast_fp16")]; + tensor var_572 = const()[name = string("op_572"), val = tensor([1, 12, 64, -1])]; + tensor var_573_cast_fp16 = reshape(shape = var_572, x = obj_35_cast_fp16)[name = string("op_573_cast_fp16")]; + bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; + bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_571_cast_fp16, y = var_573_cast_fp16)[name = string("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_15_cast_fp16")]; + tensor obj_41_cast_fp16 = softmax(axis = var_395, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")]; + tensor var_582 = const()[name = string("op_582"), val = tensor([1, 12, 64, -1])]; + tensor var_583_cast_fp16 = reshape(shape = var_582, x = obj_37_cast_fp16)[name = string("op_583_cast_fp16")]; + bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; + bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_583_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")]; + tensor var_586 = const()[name = string("op_586"), val = tensor([1, 768, 1, -1])]; + tensor input_13_cast_fp16 = reshape(shape = var_586, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")]; + string var_596_pad_type_0 = const()[name = string("op_596_pad_type_0"), val = string("valid")]; + tensor var_596_strides_0 = const()[name = string("op_596_strides_0"), val = tensor([1, 1])]; + tensor var_596_pad_0 = const()[name = string("op_596_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_596_dilations_0 = const()[name = string("op_596_dilations_0"), val = tensor([1, 1])]; + int32 var_596_groups_0 = const()[name = string("op_596_groups_0"), val = int32(1)]; + tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87644352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87939328))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87939456)))]; + tensor var_596_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_596_dilations_0, groups = var_596_groups_0, pad = var_596_pad_0, pad_type = var_596_pad_type_0, strides = var_596_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_596_cast_fp16")]; + string var_602_pad_type_0 = const()[name = string("op_602_pad_type_0"), val = string("valid")]; + tensor var_602_strides_0 = const()[name = string("op_602_strides_0"), val = tensor([1, 1])]; + tensor var_602_pad_0 = const()[name = string("op_602_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_602_dilations_0 = const()[name = string("op_602_dilations_0"), val = tensor([1, 1])]; + int32 var_602_groups_0 = const()[name = string("op_602_groups_0"), val = int32(1)]; + tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87947776))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87941056))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_602_cast_fp16 = conv(dilations = var_602_dilations_0, groups = var_602_groups_0, pad = var_602_pad_0, pad_type = var_602_pad_type_0, strides = var_602_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_602_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_596_cast_fp16, y = var_602_cast_fp16)[name = string("obj_39_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor([1])]; + fp16 var_613_to_fp16 = const()[name = string("op_613_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_613_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")]; + tensor input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88021568)))]; + tensor input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88023168)))]; + fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")]; + string var_631_pad_type_0 = const()[name = string("op_631_pad_type_0"), val = string("valid")]; + tensor var_631_strides_0 = const()[name = string("op_631_strides_0"), val = tensor([1, 1])]; + tensor var_631_pad_0 = const()[name = string("op_631_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_631_dilations_0 = const()[name = string("op_631_dilations_0"), val = tensor([1, 1])]; + int32 var_631_groups_0 = const()[name = string("op_631_groups_0"), val = int32(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89204480))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89204608)))]; + tensor var_631_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_631_cast_fp16")]; + string var_637_pad_type_0 = const()[name = string("op_637_pad_type_0"), val = string("valid")]; + tensor var_637_strides_0 = const()[name = string("op_637_strides_0"), val = tensor([1, 1])]; + tensor var_637_pad_0 = const()[name = string("op_637_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_637_dilations_0 = const()[name = string("op_637_dilations_0"), val = tensor([1, 1])]; + int32 var_637_groups_0 = const()[name = string("op_637_groups_0"), val = int32(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89259392))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89210816))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_637_cast_fp16 = conv(dilations = var_637_dilations_0, groups = var_637_groups_0, pad = var_637_pad_0, pad_type = var_637_pad_type_0, strides = var_637_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_637_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = var_631_cast_fp16, y = var_637_cast_fp16)[name = string("input_17_cast_fp16")]; + string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")]; + tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")]; + string var_648_pad_type_0 = const()[name = string("op_648_pad_type_0"), val = string("valid")]; + tensor var_648_strides_0 = const()[name = string("op_648_strides_0"), val = tensor([1, 1])]; + tensor var_648_pad_0 = const()[name = string("op_648_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_648_dilations_0 = const()[name = string("op_648_dilations_0"), val = tensor([1, 1])]; + int32 var_648_groups_0 = const()[name = string("op_648_groups_0"), val = int32(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89554368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90734080))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90734208)))]; + tensor var_648_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_648_dilations_0, groups = var_648_groups_0, pad = var_648_pad_0, pad_type = var_648_pad_type_0, strides = var_648_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_648_cast_fp16")]; + string var_654_pad_type_0 = const()[name = string("op_654_pad_type_0"), val = string("valid")]; + tensor var_654_strides_0 = const()[name = string("op_654_strides_0"), val = tensor([1, 1])]; + tensor var_654_pad_0 = const()[name = string("op_654_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_654_dilations_0 = const()[name = string("op_654_dilations_0"), val = tensor([1, 1])]; + int32 var_654_groups_0 = const()[name = string("op_654_groups_0"), val = int32(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90768320))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90735808))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_654_cast_fp16 = conv(dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_654_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_648_cast_fp16, y = var_654_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")]; + tensor obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor([3, 768, 1, 1536])]; + tensor obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")]; + tensor obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor([3, 768, 1, 1536])]; + tensor obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")]; + int32 var_676 = const()[name = string("op_676"), val = int32(3)]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_701_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91063296)))]; + tensor obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91064896)))]; + fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")]; + string var_723_pad_type_0 = const()[name = string("op_723_pad_type_0"), val = string("valid")]; + tensor var_723_strides_0 = const()[name = string("op_723_strides_0"), val = tensor([1, 1])]; + tensor var_723_pad_0 = const()[name = string("op_723_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_723_dilations_0 = const()[name = string("op_723_dilations_0"), val = tensor([1, 1])]; + int32 var_723_groups_0 = const()[name = string("op_723_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91066496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91361472))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91361600)))]; + tensor var_723_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_723_dilations_0, groups = var_723_groups_0, pad = var_723_pad_0, pad_type = var_723_pad_type_0, strides = var_723_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_723_cast_fp16")]; + string var_729_pad_type_0 = const()[name = string("op_729_pad_type_0"), val = string("valid")]; + tensor var_729_strides_0 = const()[name = string("op_729_strides_0"), val = tensor([1, 1])]; + tensor var_729_pad_0 = const()[name = string("op_729_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_729_dilations_0 = const()[name = string("op_729_dilations_0"), val = tensor([1, 1])]; + int32 var_729_groups_0 = const()[name = string("op_729_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91390144))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91363200))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_729_cast_fp16 = conv(dilations = var_729_dilations_0, groups = var_729_groups_0, pad = var_729_pad_0, pad_type = var_729_pad_type_0, strides = var_729_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_729_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_723_cast_fp16, y = var_729_cast_fp16)[name = string("query_9_cast_fp16")]; + string var_738_pad_type_0 = const()[name = string("op_738_pad_type_0"), val = string("valid")]; + tensor var_738_strides_0 = const()[name = string("op_738_strides_0"), val = tensor([1, 1])]; + tensor var_738_pad_0 = const()[name = string("op_738_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_738_dilations_0 = const()[name = string("op_738_dilations_0"), val = tensor([1, 1])]; + int32 var_738_groups_0 = const()[name = string("op_738_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91463936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91758912))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_738_cast_fp16 = conv(dilations = var_738_dilations_0, groups = var_738_groups_0, pad = var_738_pad_0, pad_type = var_738_pad_type_0, strides = var_738_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_738_cast_fp16")]; + string var_744_pad_type_0 = const()[name = string("op_744_pad_type_0"), val = string("valid")]; + tensor var_744_strides_0 = const()[name = string("op_744_strides_0"), val = tensor([1, 1])]; + tensor var_744_pad_0 = const()[name = string("op_744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_744_dilations_0 = const()[name = string("op_744_dilations_0"), val = tensor([1, 1])]; + int32 var_744_groups_0 = const()[name = string("op_744_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91783680))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91759040))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_744_cast_fp16 = conv(dilations = var_744_dilations_0, groups = var_744_groups_0, pad = var_744_pad_0, pad_type = var_744_pad_type_0, strides = var_744_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_744_cast_fp16")]; + tensor current_key_5_cast_fp16 = add(x = var_738_cast_fp16, y = var_744_cast_fp16)[name = string("current_key_5_cast_fp16")]; + string var_754_pad_type_0 = const()[name = string("op_754_pad_type_0"), val = string("valid")]; + tensor var_754_strides_0 = const()[name = string("op_754_strides_0"), val = tensor([1, 1])]; + tensor var_754_pad_0 = const()[name = string("op_754_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_754_dilations_0 = const()[name = string("op_754_dilations_0"), val = tensor([1, 1])]; + int32 var_754_groups_0 = const()[name = string("op_754_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91857472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92152448))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92152576)))]; + tensor var_754_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_754_dilations_0, groups = var_754_groups_0, pad = var_754_pad_0, pad_type = var_754_pad_type_0, strides = var_754_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_754_cast_fp16")]; + string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")]; + tensor var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor([1, 1])]; + tensor var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor([1, 1])]; + int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92186432))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92154176))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_760_cast_fp16 = conv(dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor current_value_5_cast_fp16 = add(x = var_754_cast_fp16, y = var_760_cast_fp16)[name = string("current_value_5_cast_fp16")]; + tensor var_766_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_766_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_71_cast_fp16_2, y = var_766_cast_fp16)[name = string("key_5_cast_fp16")]; + tensor var_768_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_768_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_86_cast_fp16_2, y = var_768_cast_fp16)[name = string("value_5_cast_fp16")]; + tensor var_771 = const()[name = string("op_771"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_771, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")]; + fp16 var_773_to_fp16 = const()[name = string("op_773_to_fp16"), val = fp16(0x1p-3)]; + tensor var_774_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_773_to_fp16)[name = string("op_774_cast_fp16")]; + tensor var_775 = const()[name = string("op_775"), val = tensor([1, 12, 64, -1])]; + tensor var_776_cast_fp16 = reshape(shape = var_775, x = key_5_cast_fp16)[name = string("op_776_cast_fp16")]; + bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; + bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_774_cast_fp16, y = var_776_cast_fp16)[name = string("mh_w_17_cast_fp16")]; + tensor mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_19_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_676, x = mh_w_19_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor var_785 = const()[name = string("op_785"), val = tensor([1, 12, 64, -1])]; + tensor var_786_cast_fp16 = reshape(shape = var_785, x = value_5_cast_fp16)[name = string("op_786_cast_fp16")]; + bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; + bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_786_cast_fp16, y = var_784_cast_fp16)[name = string("attn_9_cast_fp16")]; + tensor var_789 = const()[name = string("op_789"), val = tensor([1, 768, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_789, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")]; + string var_799_pad_type_0 = const()[name = string("op_799_pad_type_0"), val = string("valid")]; + tensor var_799_strides_0 = const()[name = string("op_799_strides_0"), val = tensor([1, 1])]; + tensor var_799_pad_0 = const()[name = string("op_799_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_799_dilations_0 = const()[name = string("op_799_dilations_0"), val = tensor([1, 1])]; + int32 var_799_groups_0 = const()[name = string("op_799_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92260224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92555200))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92555328)))]; + tensor var_799_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_799_dilations_0, groups = var_799_groups_0, pad = var_799_pad_0, pad_type = var_799_pad_type_0, strides = var_799_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_799_cast_fp16")]; + string var_805_pad_type_0 = const()[name = string("op_805_pad_type_0"), val = string("valid")]; + tensor var_805_strides_0 = const()[name = string("op_805_strides_0"), val = tensor([1, 1])]; + tensor var_805_pad_0 = const()[name = string("op_805_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_805_dilations_0 = const()[name = string("op_805_dilations_0"), val = tensor([1, 1])]; + int32 var_805_groups_0 = const()[name = string("op_805_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92581120))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92556928))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_805_cast_fp16 = conv(dilations = var_805_dilations_0, groups = var_805_groups_0, pad = var_805_pad_0, pad_type = var_805_pad_type_0, strides = var_805_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_805_cast_fp16")]; + tensor obj_49_cast_fp16 = add(x = var_799_cast_fp16, y = var_805_cast_fp16)[name = string("obj_49_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor([1])]; + fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_820_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")]; + tensor obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92654912)))]; + tensor obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92656512)))]; + fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")]; + string var_840_pad_type_0 = const()[name = string("op_840_pad_type_0"), val = string("valid")]; + tensor var_840_strides_0 = const()[name = string("op_840_strides_0"), val = tensor([1, 1])]; + tensor var_840_pad_0 = const()[name = string("op_840_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_840_dilations_0 = const()[name = string("op_840_dilations_0"), val = tensor([1, 1])]; + int32 var_840_groups_0 = const()[name = string("op_840_groups_0"), val = int32(1)]; + tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92658112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92953088))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92953216)))]; + tensor var_840_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_840_dilations_0, groups = var_840_groups_0, pad = var_840_pad_0, pad_type = var_840_pad_type_0, strides = var_840_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_840_cast_fp16")]; + string var_846_pad_type_0 = const()[name = string("op_846_pad_type_0"), val = string("valid")]; + tensor var_846_strides_0 = const()[name = string("op_846_strides_0"), val = tensor([1, 1])]; + tensor var_846_pad_0 = const()[name = string("op_846_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_846_dilations_0 = const()[name = string("op_846_dilations_0"), val = tensor([1, 1])]; + int32 var_846_groups_0 = const()[name = string("op_846_groups_0"), val = int32(1)]; + tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92965312))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92954816))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_846_cast_fp16 = conv(dilations = var_846_dilations_0, groups = var_846_groups_0, pad = var_846_pad_0, pad_type = var_846_pad_type_0, strides = var_846_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_840_cast_fp16, y = var_846_cast_fp16)[name = string("query_11_cast_fp16")]; + tensor var_849 = const()[name = string("op_849"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_849, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")]; + fp16 var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = fp16(0x1p-3)]; + tensor var_852_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_851_to_fp16)[name = string("op_852_cast_fp16")]; + tensor var_853 = const()[name = string("op_853"), val = tensor([1, 12, 64, -1])]; + tensor var_854_cast_fp16 = reshape(shape = var_853, x = obj_53_cast_fp16)[name = string("op_854_cast_fp16")]; + bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; + bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_852_cast_fp16, y = var_854_cast_fp16)[name = string("mh_w_21_cast_fp16")]; + tensor mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_23_cast_fp16")]; + tensor obj_59_cast_fp16 = softmax(axis = var_676, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")]; + tensor var_863 = const()[name = string("op_863"), val = tensor([1, 12, 64, -1])]; + tensor var_864_cast_fp16 = reshape(shape = var_863, x = obj_55_cast_fp16)[name = string("op_864_cast_fp16")]; + bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; + bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_864_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")]; + tensor var_867 = const()[name = string("op_867"), val = tensor([1, 768, 1, -1])]; + tensor input_23_cast_fp16 = reshape(shape = var_867, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")]; + string var_877_pad_type_0 = const()[name = string("op_877_pad_type_0"), val = string("valid")]; + tensor var_877_strides_0 = const()[name = string("op_877_strides_0"), val = tensor([1, 1])]; + tensor var_877_pad_0 = const()[name = string("op_877_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_877_dilations_0 = const()[name = string("op_877_dilations_0"), val = tensor([1, 1])]; + int32 var_877_groups_0 = const()[name = string("op_877_groups_0"), val = int32(1)]; + tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93039104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93334080))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93334208)))]; + tensor var_877_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_877_dilations_0, groups = var_877_groups_0, pad = var_877_pad_0, pad_type = var_877_pad_type_0, strides = var_877_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_877_cast_fp16")]; + string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")]; + tensor var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor([1, 1])]; + tensor var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor([1, 1])]; + int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)]; + tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93346048))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93335808))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_883_cast_fp16 = conv(dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_883_cast_fp16")]; + tensor obj_57_cast_fp16 = add(x = var_877_cast_fp16, y = var_883_cast_fp16)[name = string("obj_57_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")]; + tensor out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor([1])]; + fp16 var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_894_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")]; + tensor input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93419840)))]; + tensor input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93421440)))]; + fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")]; + string var_912_pad_type_0 = const()[name = string("op_912_pad_type_0"), val = string("valid")]; + tensor var_912_strides_0 = const()[name = string("op_912_strides_0"), val = tensor([1, 1])]; + tensor var_912_pad_0 = const()[name = string("op_912_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_912_dilations_0 = const()[name = string("op_912_dilations_0"), val = tensor([1, 1])]; + int32 var_912_groups_0 = const()[name = string("op_912_groups_0"), val = int32(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93423040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94602752))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94602880)))]; + tensor var_912_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_912_dilations_0, groups = var_912_groups_0, pad = var_912_pad_0, pad_type = var_912_pad_type_0, strides = var_912_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_912_cast_fp16")]; + string var_918_pad_type_0 = const()[name = string("op_918_pad_type_0"), val = string("valid")]; + tensor var_918_strides_0 = const()[name = string("op_918_strides_0"), val = tensor([1, 1])]; + tensor var_918_pad_0 = const()[name = string("op_918_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_918_dilations_0 = const()[name = string("op_918_dilations_0"), val = tensor([1, 1])]; + int32 var_918_groups_0 = const()[name = string("op_918_groups_0"), val = int32(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94663168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94609088))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_918_cast_fp16 = conv(dilations = var_918_dilations_0, groups = var_918_groups_0, pad = var_918_pad_0, pad_type = var_918_pad_type_0, strides = var_918_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_918_cast_fp16")]; + tensor input_27_cast_fp16 = add(x = var_912_cast_fp16, y = var_918_cast_fp16)[name = string("input_27_cast_fp16")]; + string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")]; + tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; + string var_929_pad_type_0 = const()[name = string("op_929_pad_type_0"), val = string("valid")]; + tensor var_929_strides_0 = const()[name = string("op_929_strides_0"), val = tensor([1, 1])]; + tensor var_929_pad_0 = const()[name = string("op_929_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_929_dilations_0 = const()[name = string("op_929_dilations_0"), val = tensor([1, 1])]; + int32 var_929_groups_0 = const()[name = string("op_929_groups_0"), val = int32(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94958144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96137856))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96137984)))]; + tensor var_929_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_929_dilations_0, groups = var_929_groups_0, pad = var_929_pad_0, pad_type = var_929_pad_type_0, strides = var_929_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_929_cast_fp16")]; + string var_935_pad_type_0 = const()[name = string("op_935_pad_type_0"), val = string("valid")]; + tensor var_935_strides_0 = const()[name = string("op_935_strides_0"), val = tensor([1, 1])]; + tensor var_935_pad_0 = const()[name = string("op_935_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_935_dilations_0 = const()[name = string("op_935_dilations_0"), val = tensor([1, 1])]; + int32 var_935_groups_0 = const()[name = string("op_935_groups_0"), val = int32(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96169152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96139584))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_935_cast_fp16 = conv(dilations = var_935_dilations_0, groups = var_935_groups_0, pad = var_935_pad_0, pad_type = var_935_pad_type_0, strides = var_935_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_935_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_929_cast_fp16, y = var_935_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")]; + tensor obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor([4, 768, 1, 1536])]; + tensor obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")]; + tensor obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor([4, 768, 1, 1536])]; + tensor obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")]; + int32 var_957 = const()[name = string("op_957"), val = int32(3)]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_982_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96464128)))]; + tensor obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96465728)))]; + fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")]; + string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")]; + tensor var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor([1, 1])]; + tensor var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor([1, 1])]; + int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96467328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96762304))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96762432)))]; + tensor var_1004_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1004_cast_fp16")]; + string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")]; + tensor var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor([1, 1])]; + tensor var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor([1, 1])]; + int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96778752))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96764032))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1010_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("query_13_cast_fp16")]; + string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")]; + tensor var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor([1, 1])]; + tensor var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor([1, 1])]; + int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96852544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97147520))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1019_cast_fp16 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1019_cast_fp16")]; + string var_1025_pad_type_0 = const()[name = string("op_1025_pad_type_0"), val = string("valid")]; + tensor var_1025_strides_0 = const()[name = string("op_1025_strides_0"), val = tensor([1, 1])]; + tensor var_1025_pad_0 = const()[name = string("op_1025_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1025_dilations_0 = const()[name = string("op_1025_dilations_0"), val = tensor([1, 1])]; + int32 var_1025_groups_0 = const()[name = string("op_1025_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97163456))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97147648))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1025_cast_fp16 = conv(dilations = var_1025_dilations_0, groups = var_1025_groups_0, pad = var_1025_pad_0, pad_type = var_1025_pad_type_0, strides = var_1025_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1025_cast_fp16")]; + tensor current_key_7_cast_fp16 = add(x = var_1019_cast_fp16, y = var_1025_cast_fp16)[name = string("current_key_7_cast_fp16")]; + string var_1035_pad_type_0 = const()[name = string("op_1035_pad_type_0"), val = string("valid")]; + tensor var_1035_strides_0 = const()[name = string("op_1035_strides_0"), val = tensor([1, 1])]; + tensor var_1035_pad_0 = const()[name = string("op_1035_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1035_dilations_0 = const()[name = string("op_1035_dilations_0"), val = tensor([1, 1])]; + int32 var_1035_groups_0 = const()[name = string("op_1035_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97237248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97532224))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97532352)))]; + tensor var_1035_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1035_dilations_0, groups = var_1035_groups_0, pad = var_1035_pad_0, pad_type = var_1035_pad_type_0, strides = var_1035_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1035_cast_fp16")]; + string var_1041_pad_type_0 = const()[name = string("op_1041_pad_type_0"), val = string("valid")]; + tensor var_1041_strides_0 = const()[name = string("op_1041_strides_0"), val = tensor([1, 1])]; + tensor var_1041_pad_0 = const()[name = string("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_dilations_0 = const()[name = string("op_1041_dilations_0"), val = tensor([1, 1])]; + int32 var_1041_groups_0 = const()[name = string("op_1041_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97541824))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97533952))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1041_cast_fp16 = conv(dilations = var_1041_dilations_0, groups = var_1041_groups_0, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1041_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1041_cast_fp16")]; + tensor current_value_7_cast_fp16 = add(x = var_1035_cast_fp16, y = var_1041_cast_fp16)[name = string("current_value_7_cast_fp16")]; + tensor var_1047_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1047_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_71_cast_fp16_3, y = var_1047_cast_fp16)[name = string("key_7_cast_fp16")]; + tensor var_1049_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1049_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_86_cast_fp16_3, y = var_1049_cast_fp16)[name = string("value_7_cast_fp16")]; + tensor var_1052 = const()[name = string("op_1052"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1052, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")]; + fp16 var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1055_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1054_to_fp16)[name = string("op_1055_cast_fp16")]; + tensor var_1056 = const()[name = string("op_1056"), val = tensor([1, 12, 64, -1])]; + tensor var_1057_cast_fp16 = reshape(shape = var_1056, x = key_7_cast_fp16)[name = string("op_1057_cast_fp16")]; + bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)]; + bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1055_cast_fp16, y = var_1057_cast_fp16)[name = string("mh_w_25_cast_fp16")]; + tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_27_cast_fp16")]; + tensor var_1065_cast_fp16 = softmax(axis = var_957, x = mh_w_27_cast_fp16)[name = string("op_1065_cast_fp16")]; + tensor var_1066 = const()[name = string("op_1066"), val = tensor([1, 12, 64, -1])]; + tensor var_1067_cast_fp16 = reshape(shape = var_1066, x = value_7_cast_fp16)[name = string("op_1067_cast_fp16")]; + bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; + bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1067_cast_fp16, y = var_1065_cast_fp16)[name = string("attn_13_cast_fp16")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([1, 768, 1, -1])]; + tensor input_31_cast_fp16 = reshape(shape = var_1070, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")]; + string var_1080_pad_type_0 = const()[name = string("op_1080_pad_type_0"), val = string("valid")]; + tensor var_1080_strides_0 = const()[name = string("op_1080_strides_0"), val = tensor([1, 1])]; + tensor var_1080_pad_0 = const()[name = string("op_1080_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1080_dilations_0 = const()[name = string("op_1080_dilations_0"), val = tensor([1, 1])]; + int32 var_1080_groups_0 = const()[name = string("op_1080_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97615616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97910592))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97910720)))]; + tensor var_1080_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1080_dilations_0, groups = var_1080_groups_0, pad = var_1080_pad_0, pad_type = var_1080_pad_type_0, strides = var_1080_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1080_cast_fp16")]; + string var_1086_pad_type_0 = const()[name = string("op_1086_pad_type_0"), val = string("valid")]; + tensor var_1086_strides_0 = const()[name = string("op_1086_strides_0"), val = tensor([1, 1])]; + tensor var_1086_pad_0 = const()[name = string("op_1086_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1086_dilations_0 = const()[name = string("op_1086_dilations_0"), val = tensor([1, 1])]; + int32 var_1086_groups_0 = const()[name = string("op_1086_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97923584))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97912320))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1086_cast_fp16 = conv(dilations = var_1086_dilations_0, groups = var_1086_groups_0, pad = var_1086_pad_0, pad_type = var_1086_pad_type_0, strides = var_1086_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1086_cast_fp16")]; + tensor obj_67_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1086_cast_fp16)[name = string("obj_67_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")]; + tensor out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor([1])]; + fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1101_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")]; + tensor obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97997376)))]; + tensor obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97998976)))]; + fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")]; + string var_1121_pad_type_0 = const()[name = string("op_1121_pad_type_0"), val = string("valid")]; + tensor var_1121_strides_0 = const()[name = string("op_1121_strides_0"), val = tensor([1, 1])]; + tensor var_1121_pad_0 = const()[name = string("op_1121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1121_dilations_0 = const()[name = string("op_1121_dilations_0"), val = tensor([1, 1])]; + int32 var_1121_groups_0 = const()[name = string("op_1121_groups_0"), val = int32(1)]; + tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98000576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98295552))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98295680)))]; + tensor var_1121_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1121_dilations_0, groups = var_1121_groups_0, pad = var_1121_pad_0, pad_type = var_1121_pad_type_0, strides = var_1121_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1121_cast_fp16")]; + string var_1127_pad_type_0 = const()[name = string("op_1127_pad_type_0"), val = string("valid")]; + tensor var_1127_strides_0 = const()[name = string("op_1127_strides_0"), val = tensor([1, 1])]; + tensor var_1127_pad_0 = const()[name = string("op_1127_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1127_dilations_0 = const()[name = string("op_1127_dilations_0"), val = tensor([1, 1])]; + int32 var_1127_groups_0 = const()[name = string("op_1127_groups_0"), val = int32(1)]; + tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98307712))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98297280))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1127_cast_fp16 = conv(dilations = var_1127_dilations_0, groups = var_1127_groups_0, pad = var_1127_pad_0, pad_type = var_1127_pad_type_0, strides = var_1127_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1127_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1121_cast_fp16, y = var_1127_cast_fp16)[name = string("query_15_cast_fp16")]; + tensor var_1130 = const()[name = string("op_1130"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1130, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")]; + fp16 var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1133_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1132_to_fp16)[name = string("op_1133_cast_fp16")]; + tensor var_1134 = const()[name = string("op_1134"), val = tensor([1, 12, 64, -1])]; + tensor var_1135_cast_fp16 = reshape(shape = var_1134, x = obj_71_cast_fp16)[name = string("op_1135_cast_fp16")]; + bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)]; + bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)]; + tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1133_cast_fp16, y = var_1135_cast_fp16)[name = string("mh_w_29_cast_fp16")]; + tensor mh_w_31_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_31_cast_fp16")]; + tensor obj_77_cast_fp16 = softmax(axis = var_957, x = mh_w_31_cast_fp16)[name = string("obj_77_cast_fp16")]; + tensor var_1144 = const()[name = string("op_1144"), val = tensor([1, 12, 64, -1])]; + tensor var_1145_cast_fp16 = reshape(shape = var_1144, x = obj_73_cast_fp16)[name = string("op_1145_cast_fp16")]; + bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)]; + bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1145_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_15_cast_fp16")]; + tensor var_1148 = const()[name = string("op_1148"), val = tensor([1, 768, 1, -1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1148, x = attn_15_cast_fp16)[name = string("input_33_cast_fp16")]; + string var_1158_pad_type_0 = const()[name = string("op_1158_pad_type_0"), val = string("valid")]; + tensor var_1158_strides_0 = const()[name = string("op_1158_strides_0"), val = tensor([1, 1])]; + tensor var_1158_pad_0 = const()[name = string("op_1158_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1158_dilations_0 = const()[name = string("op_1158_dilations_0"), val = tensor([1, 1])]; + int32 var_1158_groups_0 = const()[name = string("op_1158_groups_0"), val = int32(1)]; + tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98381504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98676480))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98676608)))]; + tensor var_1158_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1158_cast_fp16")]; + string var_1164_pad_type_0 = const()[name = string("op_1164_pad_type_0"), val = string("valid")]; + tensor var_1164_strides_0 = const()[name = string("op_1164_strides_0"), val = tensor([1, 1])]; + tensor var_1164_pad_0 = const()[name = string("op_1164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1164_dilations_0 = const()[name = string("op_1164_dilations_0"), val = tensor([1, 1])]; + int32 var_1164_groups_0 = const()[name = string("op_1164_groups_0"), val = int32(1)]; + tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98687168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98678208))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1164_cast_fp16 = conv(dilations = var_1164_dilations_0, groups = var_1164_groups_0, pad = var_1164_pad_0, pad_type = var_1164_pad_type_0, strides = var_1164_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1164_cast_fp16")]; + tensor obj_75_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1164_cast_fp16)[name = string("obj_75_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor([1])]; + fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1175_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98760960)))]; + tensor input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98762560)))]; + fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")]; + string var_1193_pad_type_0 = const()[name = string("op_1193_pad_type_0"), val = string("valid")]; + tensor var_1193_strides_0 = const()[name = string("op_1193_strides_0"), val = tensor([1, 1])]; + tensor var_1193_pad_0 = const()[name = string("op_1193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1193_dilations_0 = const()[name = string("op_1193_dilations_0"), val = tensor([1, 1])]; + int32 var_1193_groups_0 = const()[name = string("op_1193_groups_0"), val = int32(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98764160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99943872))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99944000)))]; + tensor var_1193_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1193_dilations_0, groups = var_1193_groups_0, pad = var_1193_pad_0, pad_type = var_1193_pad_type_0, strides = var_1193_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1193_cast_fp16")]; + string var_1199_pad_type_0 = const()[name = string("op_1199_pad_type_0"), val = string("valid")]; + tensor var_1199_strides_0 = const()[name = string("op_1199_strides_0"), val = tensor([1, 1])]; + tensor var_1199_pad_0 = const()[name = string("op_1199_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1199_dilations_0 = const()[name = string("op_1199_dilations_0"), val = tensor([1, 1])]; + int32 var_1199_groups_0 = const()[name = string("op_1199_groups_0"), val = int32(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99977600))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99950208))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1199_cast_fp16 = conv(dilations = var_1199_dilations_0, groups = var_1199_groups_0, pad = var_1199_pad_0, pad_type = var_1199_pad_type_0, strides = var_1199_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1199_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1193_cast_fp16, y = var_1199_cast_fp16)[name = string("input_37_cast_fp16")]; + string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")]; + string var_1210_pad_type_0 = const()[name = string("op_1210_pad_type_0"), val = string("valid")]; + tensor var_1210_strides_0 = const()[name = string("op_1210_strides_0"), val = tensor([1, 1])]; + tensor var_1210_pad_0 = const()[name = string("op_1210_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1210_dilations_0 = const()[name = string("op_1210_dilations_0"), val = tensor([1, 1])]; + int32 var_1210_groups_0 = const()[name = string("op_1210_groups_0"), val = int32(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100272576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101452288))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101452416)))]; + tensor var_1210_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1210_dilations_0, groups = var_1210_groups_0, pad = var_1210_pad_0, pad_type = var_1210_pad_type_0, strides = var_1210_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1210_cast_fp16")]; + string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")]; + tensor var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor([1, 1])]; + tensor var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor([1, 1])]; + int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101478464))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101454016))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1216_cast_fp16 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1216_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_1210_cast_fp16, y = var_1216_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_25_cast_fp16")]; + tensor obj_89_begin_0 = const()[name = string("obj_89_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor obj_89_end_0 = const()[name = string("obj_89_end_0"), val = tensor([5, 768, 1, 1536])]; + tensor obj_89_end_mask_0 = const()[name = string("obj_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_89_cast_fp16 = slice_by_index(begin = obj_89_begin_0, end = obj_89_end_0, end_mask = obj_89_end_mask_0, x = read_state_2)[name = string("obj_89_cast_fp16")]; + tensor obj_91_begin_0 = const()[name = string("obj_91_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor obj_91_end_0 = const()[name = string("obj_91_end_0"), val = tensor([5, 768, 1, 1536])]; + tensor obj_91_end_mask_0 = const()[name = string("obj_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_91_cast_fp16 = slice_by_index(begin = obj_91_begin_0, end = obj_91_end_0, end_mask = obj_91_end_mask_0, x = read_state_3)[name = string("obj_91_cast_fp16")]; + int32 var_1238 = const()[name = string("op_1238"), val = int32(3)]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1263_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor obj_79_gamma_0_to_fp16 = const()[name = string("obj_79_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101773440)))]; + tensor obj_79_beta_0_to_fp16 = const()[name = string("obj_79_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101775040)))]; + fp16 obj_79_epsilon_0_to_fp16 = const()[name = string("obj_79_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_79_cast_fp16")]; + string var_1285_pad_type_0 = const()[name = string("op_1285_pad_type_0"), val = string("valid")]; + tensor var_1285_strides_0 = const()[name = string("op_1285_strides_0"), val = tensor([1, 1])]; + tensor var_1285_pad_0 = const()[name = string("op_1285_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1285_dilations_0 = const()[name = string("op_1285_dilations_0"), val = tensor([1, 1])]; + int32 var_1285_groups_0 = const()[name = string("op_1285_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101776640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102071616))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102071744)))]; + tensor var_1285_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1285_dilations_0, groups = var_1285_groups_0, pad = var_1285_pad_0, pad_type = var_1285_pad_type_0, strides = var_1285_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1285_cast_fp16")]; + string var_1291_pad_type_0 = const()[name = string("op_1291_pad_type_0"), val = string("valid")]; + tensor var_1291_strides_0 = const()[name = string("op_1291_strides_0"), val = tensor([1, 1])]; + tensor var_1291_pad_0 = const()[name = string("op_1291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1291_dilations_0 = const()[name = string("op_1291_dilations_0"), val = tensor([1, 1])]; + int32 var_1291_groups_0 = const()[name = string("op_1291_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102081536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102073344))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1291_cast_fp16 = conv(dilations = var_1291_dilations_0, groups = var_1291_groups_0, pad = var_1291_pad_0, pad_type = var_1291_pad_type_0, strides = var_1291_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1285_cast_fp16, y = var_1291_cast_fp16)[name = string("query_17_cast_fp16")]; + string var_1300_pad_type_0 = const()[name = string("op_1300_pad_type_0"), val = string("valid")]; + tensor var_1300_strides_0 = const()[name = string("op_1300_strides_0"), val = tensor([1, 1])]; + tensor var_1300_pad_0 = const()[name = string("op_1300_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1300_dilations_0 = const()[name = string("op_1300_dilations_0"), val = tensor([1, 1])]; + int32 var_1300_groups_0 = const()[name = string("op_1300_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102155328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102450304))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1300_cast_fp16 = conv(dilations = var_1300_dilations_0, groups = var_1300_groups_0, pad = var_1300_pad_0, pad_type = var_1300_pad_type_0, strides = var_1300_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1300_cast_fp16")]; + string var_1306_pad_type_0 = const()[name = string("op_1306_pad_type_0"), val = string("valid")]; + tensor var_1306_strides_0 = const()[name = string("op_1306_strides_0"), val = tensor([1, 1])]; + tensor var_1306_pad_0 = const()[name = string("op_1306_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1306_dilations_0 = const()[name = string("op_1306_dilations_0"), val = tensor([1, 1])]; + int32 var_1306_groups_0 = const()[name = string("op_1306_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102459584))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102450432))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1306_cast_fp16 = conv(dilations = var_1306_dilations_0, groups = var_1306_groups_0, pad = var_1306_pad_0, pad_type = var_1306_pad_type_0, strides = var_1306_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1306_cast_fp16")]; + tensor current_key_9_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1306_cast_fp16)[name = string("current_key_9_cast_fp16")]; + string var_1316_pad_type_0 = const()[name = string("op_1316_pad_type_0"), val = string("valid")]; + tensor var_1316_strides_0 = const()[name = string("op_1316_strides_0"), val = tensor([1, 1])]; + tensor var_1316_pad_0 = const()[name = string("op_1316_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1316_dilations_0 = const()[name = string("op_1316_dilations_0"), val = tensor([1, 1])]; + int32 var_1316_groups_0 = const()[name = string("op_1316_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102533376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102828352))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102828480)))]; + tensor var_1316_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1316_dilations_0, groups = var_1316_groups_0, pad = var_1316_pad_0, pad_type = var_1316_pad_type_0, strides = var_1316_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1316_cast_fp16")]; + string var_1322_pad_type_0 = const()[name = string("op_1322_pad_type_0"), val = string("valid")]; + tensor var_1322_strides_0 = const()[name = string("op_1322_strides_0"), val = tensor([1, 1])]; + tensor var_1322_pad_0 = const()[name = string("op_1322_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1322_dilations_0 = const()[name = string("op_1322_dilations_0"), val = tensor([1, 1])]; + int32 var_1322_groups_0 = const()[name = string("op_1322_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102836800))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102830080))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1322_cast_fp16 = conv(dilations = var_1322_dilations_0, groups = var_1322_groups_0, pad = var_1322_pad_0, pad_type = var_1322_pad_type_0, strides = var_1322_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1322_cast_fp16")]; + tensor current_value_9_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1322_cast_fp16)[name = string("current_value_9_cast_fp16")]; + tensor var_1328_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1328_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_71_cast_fp16_4, y = var_1328_cast_fp16)[name = string("key_9_cast_fp16")]; + tensor var_1330_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1330_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_86_cast_fp16_4, y = var_1330_cast_fp16)[name = string("value_9_cast_fp16")]; + tensor var_1333 = const()[name = string("op_1333"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1333, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")]; + fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1336_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1335_to_fp16)[name = string("op_1336_cast_fp16")]; + tensor var_1337 = const()[name = string("op_1337"), val = tensor([1, 12, 64, -1])]; + tensor var_1338_cast_fp16 = reshape(shape = var_1337, x = key_9_cast_fp16)[name = string("op_1338_cast_fp16")]; + bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)]; + bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)]; + tensor mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_1336_cast_fp16, y = var_1338_cast_fp16)[name = string("mh_w_33_cast_fp16")]; + tensor mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_35_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_1238, x = mh_w_35_cast_fp16)[name = string("op_1346_cast_fp16")]; + tensor var_1347 = const()[name = string("op_1347"), val = tensor([1, 12, 64, -1])]; + tensor var_1348_cast_fp16 = reshape(shape = var_1347, x = value_9_cast_fp16)[name = string("op_1348_cast_fp16")]; + bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)]; + bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1348_cast_fp16, y = var_1346_cast_fp16)[name = string("attn_17_cast_fp16")]; + tensor var_1351 = const()[name = string("op_1351"), val = tensor([1, 768, 1, -1])]; + tensor input_41_cast_fp16 = reshape(shape = var_1351, x = attn_17_cast_fp16)[name = string("input_41_cast_fp16")]; + string var_1361_pad_type_0 = const()[name = string("op_1361_pad_type_0"), val = string("valid")]; + tensor var_1361_strides_0 = const()[name = string("op_1361_strides_0"), val = tensor([1, 1])]; + tensor var_1361_pad_0 = const()[name = string("op_1361_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1361_dilations_0 = const()[name = string("op_1361_dilations_0"), val = tensor([1, 1])]; + int32 var_1361_groups_0 = const()[name = string("op_1361_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102910592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103205568))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103205696)))]; + tensor var_1361_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1361_dilations_0, groups = var_1361_groups_0, pad = var_1361_pad_0, pad_type = var_1361_pad_type_0, strides = var_1361_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1361_cast_fp16")]; + string var_1367_pad_type_0 = const()[name = string("op_1367_pad_type_0"), val = string("valid")]; + tensor var_1367_strides_0 = const()[name = string("op_1367_strides_0"), val = tensor([1, 1])]; + tensor var_1367_pad_0 = const()[name = string("op_1367_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1367_dilations_0 = const()[name = string("op_1367_dilations_0"), val = tensor([1, 1])]; + int32 var_1367_groups_0 = const()[name = string("op_1367_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103216512))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103207296))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1367_cast_fp16 = conv(dilations = var_1367_dilations_0, groups = var_1367_groups_0, pad = var_1367_pad_0, pad_type = var_1367_pad_type_0, strides = var_1367_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1367_cast_fp16")]; + tensor obj_85_cast_fp16 = add(x = var_1361_cast_fp16, y = var_1367_cast_fp16)[name = string("obj_85_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_85_cast_fp16)[name = string("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor([1])]; + fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1382_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")]; + tensor obj_87_gamma_0_to_fp16 = const()[name = string("obj_87_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103290304)))]; + tensor obj_87_beta_0_to_fp16 = const()[name = string("obj_87_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103291904)))]; + fp16 obj_87_epsilon_0_to_fp16 = const()[name = string("obj_87_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_87_cast_fp16 = batch_norm(beta = obj_87_beta_0_to_fp16, epsilon = obj_87_epsilon_0_to_fp16, gamma = obj_87_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("obj_87_cast_fp16")]; + string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; + tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; + tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; + int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; + tensor layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103293504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103588480))))[name = string("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103588608)))]; + tensor var_1402_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_87_cast_fp16)[name = string("op_1402_cast_fp16")]; + string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")]; + tensor var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor([1, 1])]; + tensor var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor([1, 1])]; + int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)]; + tensor layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103598720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103590208))))[name = string("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_87_cast_fp16)[name = string("op_1408_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1402_cast_fp16, y = var_1408_cast_fp16)[name = string("query_19_cast_fp16")]; + tensor var_1411 = const()[name = string("op_1411"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1411, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")]; + fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1414_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1413_to_fp16)[name = string("op_1414_cast_fp16")]; + tensor var_1415 = const()[name = string("op_1415"), val = tensor([1, 12, 64, -1])]; + tensor var_1416_cast_fp16 = reshape(shape = var_1415, x = obj_89_cast_fp16)[name = string("op_1416_cast_fp16")]; + bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)]; + bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1414_cast_fp16, y = var_1416_cast_fp16)[name = string("mh_w_37_cast_fp16")]; + tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_39_cast_fp16")]; + tensor obj_95_cast_fp16 = softmax(axis = var_1238, x = mh_w_39_cast_fp16)[name = string("obj_95_cast_fp16")]; + tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 12, 64, -1])]; + tensor var_1426_cast_fp16 = reshape(shape = var_1425, x = obj_91_cast_fp16)[name = string("op_1426_cast_fp16")]; + bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)]; + bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1426_cast_fp16, y = obj_95_cast_fp16)[name = string("attn_19_cast_fp16")]; + tensor var_1429 = const()[name = string("op_1429"), val = tensor([1, 768, 1, -1])]; + tensor input_43_cast_fp16 = reshape(shape = var_1429, x = attn_19_cast_fp16)[name = string("input_43_cast_fp16")]; + string var_1439_pad_type_0 = const()[name = string("op_1439_pad_type_0"), val = string("valid")]; + tensor var_1439_strides_0 = const()[name = string("op_1439_strides_0"), val = tensor([1, 1])]; + tensor var_1439_pad_0 = const()[name = string("op_1439_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1439_dilations_0 = const()[name = string("op_1439_dilations_0"), val = tensor([1, 1])]; + int32 var_1439_groups_0 = const()[name = string("op_1439_groups_0"), val = int32(1)]; + tensor layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103672512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103967488))))[name = string("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103967616)))]; + tensor var_1439_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1439_dilations_0, groups = var_1439_groups_0, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1439_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1439_cast_fp16")]; + string var_1445_pad_type_0 = const()[name = string("op_1445_pad_type_0"), val = string("valid")]; + tensor var_1445_strides_0 = const()[name = string("op_1445_strides_0"), val = tensor([1, 1])]; + tensor var_1445_pad_0 = const()[name = string("op_1445_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1445_dilations_0 = const()[name = string("op_1445_dilations_0"), val = tensor([1, 1])]; + int32 var_1445_groups_0 = const()[name = string("op_1445_groups_0"), val = int32(1)]; + tensor layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103977024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103969216))))[name = string("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1445_cast_fp16 = conv(dilations = var_1445_dilations_0, groups = var_1445_groups_0, pad = var_1445_pad_0, pad_type = var_1445_pad_type_0, strides = var_1445_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1445_cast_fp16")]; + tensor obj_93_cast_fp16 = add(x = var_1439_cast_fp16, y = var_1445_cast_fp16)[name = string("obj_93_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_93_cast_fp16)[name = string("inputs_29_cast_fp16")]; + tensor out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor([1])]; + fp16 var_1456_to_fp16 = const()[name = string("op_1456_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1456_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")]; + tensor input_45_gamma_0_to_fp16 = const()[name = string("input_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104050816)))]; + tensor input_45_beta_0_to_fp16 = const()[name = string("input_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104052416)))]; + fp16 input_45_epsilon_0_to_fp16 = const()[name = string("input_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("input_45_cast_fp16")]; + string var_1474_pad_type_0 = const()[name = string("op_1474_pad_type_0"), val = string("valid")]; + tensor var_1474_strides_0 = const()[name = string("op_1474_strides_0"), val = tensor([1, 1])]; + tensor var_1474_pad_0 = const()[name = string("op_1474_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1474_dilations_0 = const()[name = string("op_1474_dilations_0"), val = tensor([1, 1])]; + int32 var_1474_groups_0 = const()[name = string("op_1474_groups_0"), val = int32(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104054016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105233728))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105233856)))]; + tensor var_1474_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("op_1474_cast_fp16")]; + string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; + tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; + tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; + int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105261376))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105240064))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = string("op_1480_cast_fp16")]; + tensor input_47_cast_fp16 = add(x = var_1474_cast_fp16, y = var_1480_cast_fp16)[name = string("input_47_cast_fp16")]; + string input_49_mode_0 = const()[name = string("input_49_mode_0"), val = string("EXACT")]; + tensor input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")]; + string var_1491_pad_type_0 = const()[name = string("op_1491_pad_type_0"), val = string("valid")]; + tensor var_1491_strides_0 = const()[name = string("op_1491_strides_0"), val = tensor([1, 1])]; + tensor var_1491_pad_0 = const()[name = string("op_1491_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1491_dilations_0 = const()[name = string("op_1491_dilations_0"), val = tensor([1, 1])]; + int32 var_1491_groups_0 = const()[name = string("op_1491_groups_0"), val = int32(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105556352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106736064))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106736192)))]; + tensor var_1491_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1491_dilations_0, groups = var_1491_groups_0, pad = var_1491_pad_0, pad_type = var_1491_pad_type_0, strides = var_1491_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1491_cast_fp16")]; + string var_1497_pad_type_0 = const()[name = string("op_1497_pad_type_0"), val = string("valid")]; + tensor var_1497_strides_0 = const()[name = string("op_1497_strides_0"), val = tensor([1, 1])]; + tensor var_1497_pad_0 = const()[name = string("op_1497_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1497_dilations_0 = const()[name = string("op_1497_dilations_0"), val = tensor([1, 1])]; + int32 var_1497_groups_0 = const()[name = string("op_1497_groups_0"), val = int32(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106759168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106737792))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1497_cast_fp16 = conv(dilations = var_1497_dilations_0, groups = var_1497_groups_0, pad = var_1497_pad_0, pad_type = var_1497_pad_type_0, strides = var_1497_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1497_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_1491_cast_fp16, y = var_1497_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_31_cast_fp16")]; + tensor obj_107_begin_0 = const()[name = string("obj_107_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor obj_107_end_0 = const()[name = string("obj_107_end_0"), val = tensor([6, 768, 1, 1536])]; + tensor obj_107_end_mask_0 = const()[name = string("obj_107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_107_cast_fp16 = slice_by_index(begin = obj_107_begin_0, end = obj_107_end_0, end_mask = obj_107_end_mask_0, x = read_state_2)[name = string("obj_107_cast_fp16")]; + tensor obj_109_begin_0 = const()[name = string("obj_109_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor obj_109_end_0 = const()[name = string("obj_109_end_0"), val = tensor([6, 768, 1, 1536])]; + tensor obj_109_end_mask_0 = const()[name = string("obj_109_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_109_cast_fp16 = slice_by_index(begin = obj_109_begin_0, end = obj_109_end_0, end_mask = obj_109_end_mask_0, x = read_state_3)[name = string("obj_109_cast_fp16")]; + int32 var_1519 = const()[name = string("op_1519"), val = int32(3)]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_1544_to_fp16 = const()[name = string("op_1544_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1544_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor obj_97_gamma_0_to_fp16 = const()[name = string("obj_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107054144)))]; + tensor obj_97_beta_0_to_fp16 = const()[name = string("obj_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107055744)))]; + fp16 obj_97_epsilon_0_to_fp16 = const()[name = string("obj_97_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("obj_97_cast_fp16")]; + string var_1566_pad_type_0 = const()[name = string("op_1566_pad_type_0"), val = string("valid")]; + tensor var_1566_strides_0 = const()[name = string("op_1566_strides_0"), val = tensor([1, 1])]; + tensor var_1566_pad_0 = const()[name = string("op_1566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1566_dilations_0 = const()[name = string("op_1566_dilations_0"), val = tensor([1, 1])]; + int32 var_1566_groups_0 = const()[name = string("op_1566_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107057344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107352320))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107352448)))]; + tensor var_1566_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1566_cast_fp16")]; + string var_1572_pad_type_0 = const()[name = string("op_1572_pad_type_0"), val = string("valid")]; + tensor var_1572_strides_0 = const()[name = string("op_1572_strides_0"), val = tensor([1, 1])]; + tensor var_1572_pad_0 = const()[name = string("op_1572_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1572_dilations_0 = const()[name = string("op_1572_dilations_0"), val = tensor([1, 1])]; + int32 var_1572_groups_0 = const()[name = string("op_1572_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107362752))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107354048))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1572_cast_fp16 = conv(dilations = var_1572_dilations_0, groups = var_1572_groups_0, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1572_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1572_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_1566_cast_fp16, y = var_1572_cast_fp16)[name = string("query_21_cast_fp16")]; + string var_1581_pad_type_0 = const()[name = string("op_1581_pad_type_0"), val = string("valid")]; + tensor var_1581_strides_0 = const()[name = string("op_1581_strides_0"), val = tensor([1, 1])]; + tensor var_1581_pad_0 = const()[name = string("op_1581_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1581_dilations_0 = const()[name = string("op_1581_dilations_0"), val = tensor([1, 1])]; + int32 var_1581_groups_0 = const()[name = string("op_1581_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107731520))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1581_cast_fp16 = conv(dilations = var_1581_dilations_0, groups = var_1581_groups_0, pad = var_1581_pad_0, pad_type = var_1581_pad_type_0, strides = var_1581_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1581_cast_fp16")]; + string var_1587_pad_type_0 = const()[name = string("op_1587_pad_type_0"), val = string("valid")]; + tensor var_1587_strides_0 = const()[name = string("op_1587_strides_0"), val = tensor([1, 1])]; + tensor var_1587_pad_0 = const()[name = string("op_1587_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1587_dilations_0 = const()[name = string("op_1587_dilations_0"), val = tensor([1, 1])]; + int32 var_1587_groups_0 = const()[name = string("op_1587_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107741056))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107731648))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1587_cast_fp16 = conv(dilations = var_1587_dilations_0, groups = var_1587_groups_0, pad = var_1587_pad_0, pad_type = var_1587_pad_type_0, strides = var_1587_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1587_cast_fp16")]; + tensor current_key_11_cast_fp16 = add(x = var_1581_cast_fp16, y = var_1587_cast_fp16)[name = string("current_key_11_cast_fp16")]; + string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")]; + tensor var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor([1, 1])]; + tensor var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor([1, 1])]; + int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107814848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108109824))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108109952)))]; + tensor var_1597_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1597_cast_fp16")]; + string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")]; + tensor var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor([1, 1])]; + tensor var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor([1, 1])]; + int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108117632))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108111552))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1603_cast_fp16 = conv(dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1603_cast_fp16")]; + tensor current_value_11_cast_fp16 = add(x = var_1597_cast_fp16, y = var_1603_cast_fp16)[name = string("current_value_11_cast_fp16")]; + tensor var_1609_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1609_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_71_cast_fp16_5, y = var_1609_cast_fp16)[name = string("key_11_cast_fp16")]; + tensor var_1611_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1611_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_86_cast_fp16_5, y = var_1611_cast_fp16)[name = string("value_11_cast_fp16")]; + tensor var_1614 = const()[name = string("op_1614"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_1614, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")]; + fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1617_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")]; + tensor var_1618 = const()[name = string("op_1618"), val = tensor([1, 12, 64, -1])]; + tensor var_1619_cast_fp16 = reshape(shape = var_1618, x = key_11_cast_fp16)[name = string("op_1619_cast_fp16")]; + bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)]; + bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)]; + tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1617_cast_fp16, y = var_1619_cast_fp16)[name = string("mh_w_41_cast_fp16")]; + tensor mh_w_43_cast_fp16 = add(x = mh_w_41_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_43_cast_fp16")]; + tensor var_1627_cast_fp16 = softmax(axis = var_1519, x = mh_w_43_cast_fp16)[name = string("op_1627_cast_fp16")]; + tensor var_1628 = const()[name = string("op_1628"), val = tensor([1, 12, 64, -1])]; + tensor var_1629_cast_fp16 = reshape(shape = var_1628, x = value_11_cast_fp16)[name = string("op_1629_cast_fp16")]; + bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)]; + bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1629_cast_fp16, y = var_1627_cast_fp16)[name = string("attn_21_cast_fp16")]; + tensor var_1632 = const()[name = string("op_1632"), val = tensor([1, 768, 1, -1])]; + tensor input_51_cast_fp16 = reshape(shape = var_1632, x = attn_21_cast_fp16)[name = string("input_51_cast_fp16")]; + string var_1642_pad_type_0 = const()[name = string("op_1642_pad_type_0"), val = string("valid")]; + tensor var_1642_strides_0 = const()[name = string("op_1642_strides_0"), val = tensor([1, 1])]; + tensor var_1642_pad_0 = const()[name = string("op_1642_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1642_dilations_0 = const()[name = string("op_1642_dilations_0"), val = tensor([1, 1])]; + int32 var_1642_groups_0 = const()[name = string("op_1642_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108486400))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108486528)))]; + tensor var_1642_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1642_cast_fp16")]; + string var_1648_pad_type_0 = const()[name = string("op_1648_pad_type_0"), val = string("valid")]; + tensor var_1648_strides_0 = const()[name = string("op_1648_strides_0"), val = tensor([1, 1])]; + tensor var_1648_pad_0 = const()[name = string("op_1648_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1648_dilations_0 = const()[name = string("op_1648_dilations_0"), val = tensor([1, 1])]; + int32 var_1648_groups_0 = const()[name = string("op_1648_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108495488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108488128))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1648_cast_fp16 = conv(dilations = var_1648_dilations_0, groups = var_1648_groups_0, pad = var_1648_pad_0, pad_type = var_1648_pad_type_0, strides = var_1648_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1648_cast_fp16")]; + tensor obj_103_cast_fp16 = add(x = var_1642_cast_fp16, y = var_1648_cast_fp16)[name = string("obj_103_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_103_cast_fp16)[name = string("inputs_33_cast_fp16")]; + tensor out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor([1])]; + fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1663_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")]; + tensor obj_105_gamma_0_to_fp16 = const()[name = string("obj_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569280)))]; + tensor obj_105_beta_0_to_fp16 = const()[name = string("obj_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108570880)))]; + fp16 obj_105_epsilon_0_to_fp16 = const()[name = string("obj_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_105_cast_fp16")]; + string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")]; + tensor var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor([1, 1])]; + tensor var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor([1, 1])]; + int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)]; + tensor layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108572480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108867456))))[name = string("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108867584)))]; + tensor var_1683_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_1683_cast_fp16")]; + string var_1689_pad_type_0 = const()[name = string("op_1689_pad_type_0"), val = string("valid")]; + tensor var_1689_strides_0 = const()[name = string("op_1689_strides_0"), val = tensor([1, 1])]; + tensor var_1689_pad_0 = const()[name = string("op_1689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1689_dilations_0 = const()[name = string("op_1689_dilations_0"), val = tensor([1, 1])]; + int32 var_1689_groups_0 = const()[name = string("op_1689_groups_0"), val = int32(1)]; + tensor layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108875648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108869184))))[name = string("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_1689_cast_fp16")]; + tensor query_23_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = string("query_23_cast_fp16")]; + tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_23_cast_fp16 = reshape(shape = var_1692, x = query_23_cast_fp16)[name = string("mh_q_23_cast_fp16")]; + fp16 var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1695_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1694_to_fp16)[name = string("op_1695_cast_fp16")]; + tensor var_1696 = const()[name = string("op_1696"), val = tensor([1, 12, 64, -1])]; + tensor var_1697_cast_fp16 = reshape(shape = var_1696, x = obj_107_cast_fp16)[name = string("op_1697_cast_fp16")]; + bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)]; + bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)]; + tensor mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_1695_cast_fp16, y = var_1697_cast_fp16)[name = string("mh_w_45_cast_fp16")]; + tensor mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_47_cast_fp16")]; + tensor obj_113_cast_fp16 = softmax(axis = var_1519, x = mh_w_47_cast_fp16)[name = string("obj_113_cast_fp16")]; + tensor var_1706 = const()[name = string("op_1706"), val = tensor([1, 12, 64, -1])]; + tensor var_1707_cast_fp16 = reshape(shape = var_1706, x = obj_109_cast_fp16)[name = string("op_1707_cast_fp16")]; + bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)]; + bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1707_cast_fp16, y = obj_113_cast_fp16)[name = string("attn_23_cast_fp16")]; + tensor var_1710 = const()[name = string("op_1710"), val = tensor([1, 768, 1, -1])]; + tensor input_53_cast_fp16 = reshape(shape = var_1710, x = attn_23_cast_fp16)[name = string("input_53_cast_fp16")]; + string var_1720_pad_type_0 = const()[name = string("op_1720_pad_type_0"), val = string("valid")]; + tensor var_1720_strides_0 = const()[name = string("op_1720_strides_0"), val = tensor([1, 1])]; + tensor var_1720_pad_0 = const()[name = string("op_1720_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1720_dilations_0 = const()[name = string("op_1720_dilations_0"), val = tensor([1, 1])]; + int32 var_1720_groups_0 = const()[name = string("op_1720_groups_0"), val = int32(1)]; + tensor layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108949440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109244416))))[name = string("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109244544)))]; + tensor var_1720_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1720_dilations_0, groups = var_1720_groups_0, pad = var_1720_pad_0, pad_type = var_1720_pad_type_0, strides = var_1720_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("op_1720_cast_fp16")]; + string var_1726_pad_type_0 = const()[name = string("op_1726_pad_type_0"), val = string("valid")]; + tensor var_1726_strides_0 = const()[name = string("op_1726_strides_0"), val = tensor([1, 1])]; + tensor var_1726_pad_0 = const()[name = string("op_1726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1726_dilations_0 = const()[name = string("op_1726_dilations_0"), val = tensor([1, 1])]; + int32 var_1726_groups_0 = const()[name = string("op_1726_groups_0"), val = int32(1)]; + tensor layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109251648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109246144))))[name = string("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1726_cast_fp16 = conv(dilations = var_1726_dilations_0, groups = var_1726_groups_0, pad = var_1726_pad_0, pad_type = var_1726_pad_type_0, strides = var_1726_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = string("op_1726_cast_fp16")]; + tensor obj_111_cast_fp16 = add(x = var_1720_cast_fp16, y = var_1726_cast_fp16)[name = string("obj_111_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_111_cast_fp16)[name = string("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor([1])]; + fp16 var_1740_to_fp16 = const()[name = string("op_1740_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1740_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")]; + tensor input_55_gamma_0_to_fp16 = const()[name = string("input_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109325440)))]; + tensor input_55_beta_0_to_fp16 = const()[name = string("input_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109327040)))]; + fp16 input_55_epsilon_0_to_fp16 = const()[name = string("input_55_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_55_cast_fp16")]; + string var_1758_pad_type_0 = const()[name = string("op_1758_pad_type_0"), val = string("valid")]; + tensor var_1758_strides_0 = const()[name = string("op_1758_strides_0"), val = tensor([1, 1])]; + tensor var_1758_pad_0 = const()[name = string("op_1758_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1758_dilations_0 = const()[name = string("op_1758_dilations_0"), val = tensor([1, 1])]; + int32 var_1758_groups_0 = const()[name = string("op_1758_groups_0"), val = int32(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109328640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110508352))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110508480)))]; + tensor var_1758_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1758_dilations_0, groups = var_1758_groups_0, pad = var_1758_pad_0, pad_type = var_1758_pad_type_0, strides = var_1758_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1758_cast_fp16")]; + string var_1764_pad_type_0 = const()[name = string("op_1764_pad_type_0"), val = string("valid")]; + tensor var_1764_strides_0 = const()[name = string("op_1764_strides_0"), val = tensor([1, 1])]; + tensor var_1764_pad_0 = const()[name = string("op_1764_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1764_dilations_0 = const()[name = string("op_1764_dilations_0"), val = tensor([1, 1])]; + int32 var_1764_groups_0 = const()[name = string("op_1764_groups_0"), val = int32(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110534016))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110514688))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1764_cast_fp16 = conv(dilations = var_1764_dilations_0, groups = var_1764_groups_0, pad = var_1764_pad_0, pad_type = var_1764_pad_type_0, strides = var_1764_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor input_57_cast_fp16 = add(x = var_1758_cast_fp16, y = var_1764_cast_fp16)[name = string("input_57_cast_fp16")]; + string input_59_mode_0 = const()[name = string("input_59_mode_0"), val = string("EXACT")]; + tensor input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = string("input_59_cast_fp16")]; + string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")]; + tensor var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor([1, 1])]; + tensor var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor([1, 1])]; + int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110828992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112008704))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112008832)))]; + tensor var_1775_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1775_cast_fp16")]; + string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")]; + tensor var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor([1, 1])]; + tensor var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor([1, 1])]; + int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112033024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112010432))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1781_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_37_cast_fp16")]; + tensor obj_125_begin_0 = const()[name = string("obj_125_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor obj_125_end_0 = const()[name = string("obj_125_end_0"), val = tensor([7, 768, 1, 1536])]; + tensor obj_125_end_mask_0 = const()[name = string("obj_125_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_125_cast_fp16 = slice_by_index(begin = obj_125_begin_0, end = obj_125_end_0, end_mask = obj_125_end_mask_0, x = read_state_2)[name = string("obj_125_cast_fp16")]; + tensor obj_127_begin_0 = const()[name = string("obj_127_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor obj_127_end_0 = const()[name = string("obj_127_end_0"), val = tensor([7, 768, 1, 1536])]; + tensor obj_127_end_mask_0 = const()[name = string("obj_127_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_127_cast_fp16 = slice_by_index(begin = obj_127_begin_0, end = obj_127_end_0, end_mask = obj_127_end_mask_0, x = read_state_3)[name = string("obj_127_cast_fp16")]; + int32 var_1804 = const()[name = string("op_1804"), val = int32(3)]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_1829_to_fp16 = const()[name = string("op_1829_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1829_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor obj_115_gamma_0_to_fp16 = const()[name = string("obj_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112328000)))]; + tensor obj_115_beta_0_to_fp16 = const()[name = string("obj_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112329600)))]; + fp16 obj_115_epsilon_0_to_fp16 = const()[name = string("obj_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_115_cast_fp16 = batch_norm(beta = obj_115_beta_0_to_fp16, epsilon = obj_115_epsilon_0_to_fp16, gamma = obj_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_115_cast_fp16")]; + string var_1851_pad_type_0 = const()[name = string("op_1851_pad_type_0"), val = string("valid")]; + tensor var_1851_strides_0 = const()[name = string("op_1851_strides_0"), val = tensor([1, 1])]; + tensor var_1851_pad_0 = const()[name = string("op_1851_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1851_dilations_0 = const()[name = string("op_1851_dilations_0"), val = tensor([1, 1])]; + int32 var_1851_groups_0 = const()[name = string("op_1851_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112626176))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112626304)))]; + tensor var_1851_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1851_dilations_0, groups = var_1851_groups_0, pad = var_1851_pad_0, pad_type = var_1851_pad_type_0, strides = var_1851_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1851_cast_fp16")]; + string var_1857_pad_type_0 = const()[name = string("op_1857_pad_type_0"), val = string("valid")]; + tensor var_1857_strides_0 = const()[name = string("op_1857_strides_0"), val = tensor([1, 1])]; + tensor var_1857_pad_0 = const()[name = string("op_1857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1857_dilations_0 = const()[name = string("op_1857_dilations_0"), val = tensor([1, 1])]; + int32 var_1857_groups_0 = const()[name = string("op_1857_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112636224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112627904))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1857_cast_fp16")]; + tensor query_25_cast_fp16 = add(x = var_1851_cast_fp16, y = var_1857_cast_fp16)[name = string("query_25_cast_fp16")]; + string var_1866_pad_type_0 = const()[name = string("op_1866_pad_type_0"), val = string("valid")]; + tensor var_1866_strides_0 = const()[name = string("op_1866_strides_0"), val = tensor([1, 1])]; + tensor var_1866_pad_0 = const()[name = string("op_1866_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1866_dilations_0 = const()[name = string("op_1866_dilations_0"), val = tensor([1, 1])]; + int32 var_1866_groups_0 = const()[name = string("op_1866_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112710016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113004992))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1866_cast_fp16 = conv(dilations = var_1866_dilations_0, groups = var_1866_groups_0, pad = var_1866_pad_0, pad_type = var_1866_pad_type_0, strides = var_1866_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1866_cast_fp16")]; + string var_1872_pad_type_0 = const()[name = string("op_1872_pad_type_0"), val = string("valid")]; + tensor var_1872_strides_0 = const()[name = string("op_1872_strides_0"), val = tensor([1, 1])]; + tensor var_1872_pad_0 = const()[name = string("op_1872_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1872_dilations_0 = const()[name = string("op_1872_dilations_0"), val = tensor([1, 1])]; + int32 var_1872_groups_0 = const()[name = string("op_1872_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113013632))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113005120))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1872_cast_fp16 = conv(dilations = var_1872_dilations_0, groups = var_1872_groups_0, pad = var_1872_pad_0, pad_type = var_1872_pad_type_0, strides = var_1872_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1872_cast_fp16")]; + tensor current_key_13_cast_fp16 = add(x = var_1866_cast_fp16, y = var_1872_cast_fp16)[name = string("current_key_13_cast_fp16")]; + string var_1882_pad_type_0 = const()[name = string("op_1882_pad_type_0"), val = string("valid")]; + tensor var_1882_strides_0 = const()[name = string("op_1882_strides_0"), val = tensor([1, 1])]; + tensor var_1882_pad_0 = const()[name = string("op_1882_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1882_dilations_0 = const()[name = string("op_1882_dilations_0"), val = tensor([1, 1])]; + int32 var_1882_groups_0 = const()[name = string("op_1882_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113382400))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113382528)))]; + tensor var_1882_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1882_dilations_0, groups = var_1882_groups_0, pad = var_1882_pad_0, pad_type = var_1882_pad_type_0, strides = var_1882_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1882_cast_fp16")]; + string var_1888_pad_type_0 = const()[name = string("op_1888_pad_type_0"), val = string("valid")]; + tensor var_1888_strides_0 = const()[name = string("op_1888_strides_0"), val = tensor([1, 1])]; + tensor var_1888_pad_0 = const()[name = string("op_1888_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1888_dilations_0 = const()[name = string("op_1888_dilations_0"), val = tensor([1, 1])]; + int32 var_1888_groups_0 = const()[name = string("op_1888_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113389824))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113384128))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1888_cast_fp16 = conv(dilations = var_1888_dilations_0, groups = var_1888_groups_0, pad = var_1888_pad_0, pad_type = var_1888_pad_type_0, strides = var_1888_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1888_cast_fp16")]; + tensor current_value_13_cast_fp16 = add(x = var_1882_cast_fp16, y = var_1888_cast_fp16)[name = string("current_value_13_cast_fp16")]; + tensor var_1894_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1894_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_71_cast_fp16_6, y = var_1894_cast_fp16)[name = string("key_13_cast_fp16")]; + tensor var_1896_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1896_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_86_cast_fp16_6, y = var_1896_cast_fp16)[name = string("value_13_cast_fp16")]; + tensor var_1899 = const()[name = string("op_1899"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_1899, x = query_25_cast_fp16)[name = string("mh_q_25_cast_fp16")]; + fp16 var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1902_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1901_to_fp16)[name = string("op_1902_cast_fp16")]; + tensor var_1903 = const()[name = string("op_1903"), val = tensor([1, 12, 64, -1])]; + tensor var_1904_cast_fp16 = reshape(shape = var_1903, x = key_13_cast_fp16)[name = string("op_1904_cast_fp16")]; + bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)]; + bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1902_cast_fp16, y = var_1904_cast_fp16)[name = string("mh_w_49_cast_fp16")]; + tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_51_cast_fp16")]; + tensor var_1912_cast_fp16 = softmax(axis = var_1804, x = mh_w_51_cast_fp16)[name = string("op_1912_cast_fp16")]; + tensor var_1913 = const()[name = string("op_1913"), val = tensor([1, 12, 64, -1])]; + tensor var_1914_cast_fp16 = reshape(shape = var_1913, x = value_13_cast_fp16)[name = string("op_1914_cast_fp16")]; + bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)]; + bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1914_cast_fp16, y = var_1912_cast_fp16)[name = string("attn_25_cast_fp16")]; + tensor var_1917 = const()[name = string("op_1917"), val = tensor([1, 768, 1, -1])]; + tensor input_61_cast_fp16 = reshape(shape = var_1917, x = attn_25_cast_fp16)[name = string("input_61_cast_fp16")]; + string var_1927_pad_type_0 = const()[name = string("op_1927_pad_type_0"), val = string("valid")]; + tensor var_1927_strides_0 = const()[name = string("op_1927_strides_0"), val = tensor([1, 1])]; + tensor var_1927_pad_0 = const()[name = string("op_1927_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1927_dilations_0 = const()[name = string("op_1927_dilations_0"), val = tensor([1, 1])]; + int32 var_1927_groups_0 = const()[name = string("op_1927_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113463616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113758592))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113758720)))]; + tensor var_1927_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1927_dilations_0, groups = var_1927_groups_0, pad = var_1927_pad_0, pad_type = var_1927_pad_type_0, strides = var_1927_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("op_1927_cast_fp16")]; + string var_1933_pad_type_0 = const()[name = string("op_1933_pad_type_0"), val = string("valid")]; + tensor var_1933_strides_0 = const()[name = string("op_1933_strides_0"), val = tensor([1, 1])]; + tensor var_1933_pad_0 = const()[name = string("op_1933_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1933_dilations_0 = const()[name = string("op_1933_dilations_0"), val = tensor([1, 1])]; + int32 var_1933_groups_0 = const()[name = string("op_1933_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113766464))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113760320))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1933_cast_fp16 = conv(dilations = var_1933_dilations_0, groups = var_1933_groups_0, pad = var_1933_pad_0, pad_type = var_1933_pad_type_0, strides = var_1933_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = string("op_1933_cast_fp16")]; + tensor obj_121_cast_fp16 = add(x = var_1927_cast_fp16, y = var_1933_cast_fp16)[name = string("obj_121_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_121_cast_fp16)[name = string("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor([1])]; + fp16 var_1948_to_fp16 = const()[name = string("op_1948_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1948_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")]; + tensor obj_123_gamma_0_to_fp16 = const()[name = string("obj_123_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113840256)))]; + tensor obj_123_beta_0_to_fp16 = const()[name = string("obj_123_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113841856)))]; + fp16 obj_123_epsilon_0_to_fp16 = const()[name = string("obj_123_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_123_cast_fp16 = batch_norm(beta = obj_123_beta_0_to_fp16, epsilon = obj_123_epsilon_0_to_fp16, gamma = obj_123_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("obj_123_cast_fp16")]; + string var_1968_pad_type_0 = const()[name = string("op_1968_pad_type_0"), val = string("valid")]; + tensor var_1968_strides_0 = const()[name = string("op_1968_strides_0"), val = tensor([1, 1])]; + tensor var_1968_pad_0 = const()[name = string("op_1968_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1968_dilations_0 = const()[name = string("op_1968_dilations_0"), val = tensor([1, 1])]; + int32 var_1968_groups_0 = const()[name = string("op_1968_groups_0"), val = int32(1)]; + tensor layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113843456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114138432))))[name = string("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114138560)))]; + tensor var_1968_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1968_dilations_0, groups = var_1968_groups_0, pad = var_1968_pad_0, pad_type = var_1968_pad_type_0, strides = var_1968_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_123_cast_fp16)[name = string("op_1968_cast_fp16")]; + string var_1974_pad_type_0 = const()[name = string("op_1974_pad_type_0"), val = string("valid")]; + tensor var_1974_strides_0 = const()[name = string("op_1974_strides_0"), val = tensor([1, 1])]; + tensor var_1974_pad_0 = const()[name = string("op_1974_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1974_dilations_0 = const()[name = string("op_1974_dilations_0"), val = tensor([1, 1])]; + int32 var_1974_groups_0 = const()[name = string("op_1974_groups_0"), val = int32(1)]; + tensor layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114145088))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114140160))))[name = string("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1974_cast_fp16 = conv(dilations = var_1974_dilations_0, groups = var_1974_groups_0, pad = var_1974_pad_0, pad_type = var_1974_pad_type_0, strides = var_1974_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_123_cast_fp16)[name = string("op_1974_cast_fp16")]; + tensor query_27_cast_fp16 = add(x = var_1968_cast_fp16, y = var_1974_cast_fp16)[name = string("query_27_cast_fp16")]; + tensor var_1977 = const()[name = string("op_1977"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_27_cast_fp16 = reshape(shape = var_1977, x = query_27_cast_fp16)[name = string("mh_q_27_cast_fp16")]; + fp16 var_1979_to_fp16 = const()[name = string("op_1979_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1980_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1979_to_fp16)[name = string("op_1980_cast_fp16")]; + tensor var_1981 = const()[name = string("op_1981"), val = tensor([1, 12, 64, -1])]; + tensor var_1982_cast_fp16 = reshape(shape = var_1981, x = obj_125_cast_fp16)[name = string("op_1982_cast_fp16")]; + bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)]; + bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)]; + tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1980_cast_fp16, y = var_1982_cast_fp16)[name = string("mh_w_53_cast_fp16")]; + tensor mh_w_55_cast_fp16 = add(x = mh_w_53_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_55_cast_fp16")]; + tensor obj_131_cast_fp16 = softmax(axis = var_1804, x = mh_w_55_cast_fp16)[name = string("obj_131_cast_fp16")]; + tensor var_1991 = const()[name = string("op_1991"), val = tensor([1, 12, 64, -1])]; + tensor var_1992_cast_fp16 = reshape(shape = var_1991, x = obj_127_cast_fp16)[name = string("op_1992_cast_fp16")]; + bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)]; + bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1992_cast_fp16, y = obj_131_cast_fp16)[name = string("attn_27_cast_fp16")]; + tensor var_1995 = const()[name = string("op_1995"), val = tensor([1, 768, 1, -1])]; + tensor input_63_cast_fp16 = reshape(shape = var_1995, x = attn_27_cast_fp16)[name = string("input_63_cast_fp16")]; + string var_2005_pad_type_0 = const()[name = string("op_2005_pad_type_0"), val = string("valid")]; + tensor var_2005_strides_0 = const()[name = string("op_2005_strides_0"), val = tensor([1, 1])]; + tensor var_2005_pad_0 = const()[name = string("op_2005_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2005_dilations_0 = const()[name = string("op_2005_dilations_0"), val = tensor([1, 1])]; + int32 var_2005_groups_0 = const()[name = string("op_2005_groups_0"), val = int32(1)]; + tensor layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114218880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114513856))))[name = string("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114513984)))]; + tensor var_2005_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2005_dilations_0, groups = var_2005_groups_0, pad = var_2005_pad_0, pad_type = var_2005_pad_type_0, strides = var_2005_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_2005_cast_fp16")]; + string var_2011_pad_type_0 = const()[name = string("op_2011_pad_type_0"), val = string("valid")]; + tensor var_2011_strides_0 = const()[name = string("op_2011_strides_0"), val = tensor([1, 1])]; + tensor var_2011_pad_0 = const()[name = string("op_2011_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2011_dilations_0 = const()[name = string("op_2011_dilations_0"), val = tensor([1, 1])]; + int32 var_2011_groups_0 = const()[name = string("op_2011_groups_0"), val = int32(1)]; + tensor layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114521856))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114515584))))[name = string("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2011_cast_fp16 = conv(dilations = var_2011_dilations_0, groups = var_2011_groups_0, pad = var_2011_pad_0, pad_type = var_2011_pad_type_0, strides = var_2011_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_2011_cast_fp16")]; + tensor obj_129_cast_fp16 = add(x = var_2005_cast_fp16, y = var_2011_cast_fp16)[name = string("obj_129_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_129_cast_fp16)[name = string("inputs_41_cast_fp16")]; + tensor out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor([1])]; + fp16 var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2022_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")]; + tensor input_65_gamma_0_to_fp16 = const()[name = string("input_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114595648)))]; + tensor input_65_beta_0_to_fp16 = const()[name = string("input_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114597248)))]; + fp16 input_65_epsilon_0_to_fp16 = const()[name = string("input_65_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("input_65_cast_fp16")]; + string var_2040_pad_type_0 = const()[name = string("op_2040_pad_type_0"), val = string("valid")]; + tensor var_2040_strides_0 = const()[name = string("op_2040_strides_0"), val = tensor([1, 1])]; + tensor var_2040_pad_0 = const()[name = string("op_2040_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2040_dilations_0 = const()[name = string("op_2040_dilations_0"), val = tensor([1, 1])]; + int32 var_2040_groups_0 = const()[name = string("op_2040_groups_0"), val = int32(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114598848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115778560))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115778688)))]; + tensor var_2040_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2040_dilations_0, groups = var_2040_groups_0, pad = var_2040_pad_0, pad_type = var_2040_pad_type_0, strides = var_2040_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_2040_cast_fp16")]; + string var_2046_pad_type_0 = const()[name = string("op_2046_pad_type_0"), val = string("valid")]; + tensor var_2046_strides_0 = const()[name = string("op_2046_strides_0"), val = tensor([1, 1])]; + tensor var_2046_pad_0 = const()[name = string("op_2046_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2046_dilations_0 = const()[name = string("op_2046_dilations_0"), val = tensor([1, 1])]; + int32 var_2046_groups_0 = const()[name = string("op_2046_groups_0"), val = int32(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115804352))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115784896))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2046_cast_fp16 = conv(dilations = var_2046_dilations_0, groups = var_2046_groups_0, pad = var_2046_pad_0, pad_type = var_2046_pad_type_0, strides = var_2046_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_2046_cast_fp16")]; + tensor input_67_cast_fp16 = add(x = var_2040_cast_fp16, y = var_2046_cast_fp16)[name = string("input_67_cast_fp16")]; + string input_69_mode_0 = const()[name = string("input_69_mode_0"), val = string("EXACT")]; + tensor input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = string("input_69_cast_fp16")]; + string var_2057_pad_type_0 = const()[name = string("op_2057_pad_type_0"), val = string("valid")]; + tensor var_2057_strides_0 = const()[name = string("op_2057_strides_0"), val = tensor([1, 1])]; + tensor var_2057_pad_0 = const()[name = string("op_2057_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2057_dilations_0 = const()[name = string("op_2057_dilations_0"), val = tensor([1, 1])]; + int32 var_2057_groups_0 = const()[name = string("op_2057_groups_0"), val = int32(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116099328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117279040))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117279168)))]; + tensor var_2057_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2057_dilations_0, groups = var_2057_groups_0, pad = var_2057_pad_0, pad_type = var_2057_pad_type_0, strides = var_2057_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("op_2057_cast_fp16")]; + string var_2063_pad_type_0 = const()[name = string("op_2063_pad_type_0"), val = string("valid")]; + tensor var_2063_strides_0 = const()[name = string("op_2063_strides_0"), val = tensor([1, 1])]; + tensor var_2063_pad_0 = const()[name = string("op_2063_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2063_dilations_0 = const()[name = string("op_2063_dilations_0"), val = tensor([1, 1])]; + int32 var_2063_groups_0 = const()[name = string("op_2063_groups_0"), val = int32(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117304960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117280768))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2063_cast_fp16 = conv(dilations = var_2063_dilations_0, groups = var_2063_groups_0, pad = var_2063_pad_0, pad_type = var_2063_pad_type_0, strides = var_2063_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = string("op_2063_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_2057_cast_fp16, y = var_2063_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_43_cast_fp16")]; + tensor obj_143_begin_0 = const()[name = string("obj_143_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor obj_143_end_0 = const()[name = string("obj_143_end_0"), val = tensor([8, 768, 1, 1536])]; + tensor obj_143_end_mask_0 = const()[name = string("obj_143_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_143_cast_fp16 = slice_by_index(begin = obj_143_begin_0, end = obj_143_end_0, end_mask = obj_143_end_mask_0, x = read_state_2)[name = string("obj_143_cast_fp16")]; + tensor obj_145_begin_0 = const()[name = string("obj_145_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor obj_145_end_0 = const()[name = string("obj_145_end_0"), val = tensor([8, 768, 1, 1536])]; + tensor obj_145_end_mask_0 = const()[name = string("obj_145_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_145_cast_fp16 = slice_by_index(begin = obj_145_begin_0, end = obj_145_end_0, end_mask = obj_145_end_mask_0, x = read_state_3)[name = string("obj_145_cast_fp16")]; + int32 var_2085 = const()[name = string("op_2085"), val = int32(3)]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2110_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor obj_133_gamma_0_to_fp16 = const()[name = string("obj_133_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117599936)))]; + tensor obj_133_beta_0_to_fp16 = const()[name = string("obj_133_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117601536)))]; + fp16 obj_133_epsilon_0_to_fp16 = const()[name = string("obj_133_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_133_cast_fp16 = batch_norm(beta = obj_133_beta_0_to_fp16, epsilon = obj_133_epsilon_0_to_fp16, gamma = obj_133_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("obj_133_cast_fp16")]; + string var_2132_pad_type_0 = const()[name = string("op_2132_pad_type_0"), val = string("valid")]; + tensor var_2132_strides_0 = const()[name = string("op_2132_strides_0"), val = tensor([1, 1])]; + tensor var_2132_pad_0 = const()[name = string("op_2132_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2132_dilations_0 = const()[name = string("op_2132_dilations_0"), val = tensor([1, 1])]; + int32 var_2132_groups_0 = const()[name = string("op_2132_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117603136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117898112))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117898240)))]; + tensor var_2132_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2132_dilations_0, groups = var_2132_groups_0, pad = var_2132_pad_0, pad_type = var_2132_pad_type_0, strides = var_2132_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2132_cast_fp16")]; + string var_2138_pad_type_0 = const()[name = string("op_2138_pad_type_0"), val = string("valid")]; + tensor var_2138_strides_0 = const()[name = string("op_2138_strides_0"), val = tensor([1, 1])]; + tensor var_2138_pad_0 = const()[name = string("op_2138_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2138_dilations_0 = const()[name = string("op_2138_dilations_0"), val = tensor([1, 1])]; + int32 var_2138_groups_0 = const()[name = string("op_2138_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117906816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117899840))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2138_cast_fp16 = conv(dilations = var_2138_dilations_0, groups = var_2138_groups_0, pad = var_2138_pad_0, pad_type = var_2138_pad_type_0, strides = var_2138_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2138_cast_fp16")]; + tensor query_29_cast_fp16 = add(x = var_2132_cast_fp16, y = var_2138_cast_fp16)[name = string("query_29_cast_fp16")]; + string var_2147_pad_type_0 = const()[name = string("op_2147_pad_type_0"), val = string("valid")]; + tensor var_2147_strides_0 = const()[name = string("op_2147_strides_0"), val = tensor([1, 1])]; + tensor var_2147_pad_0 = const()[name = string("op_2147_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2147_dilations_0 = const()[name = string("op_2147_dilations_0"), val = tensor([1, 1])]; + int32 var_2147_groups_0 = const()[name = string("op_2147_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117980608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118275584))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2147_cast_fp16 = conv(dilations = var_2147_dilations_0, groups = var_2147_groups_0, pad = var_2147_pad_0, pad_type = var_2147_pad_type_0, strides = var_2147_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2147_cast_fp16")]; + string var_2153_pad_type_0 = const()[name = string("op_2153_pad_type_0"), val = string("valid")]; + tensor var_2153_strides_0 = const()[name = string("op_2153_strides_0"), val = tensor([1, 1])]; + tensor var_2153_pad_0 = const()[name = string("op_2153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2153_dilations_0 = const()[name = string("op_2153_dilations_0"), val = tensor([1, 1])]; + int32 var_2153_groups_0 = const()[name = string("op_2153_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118283648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118275712))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2153_cast_fp16 = conv(dilations = var_2153_dilations_0, groups = var_2153_groups_0, pad = var_2153_pad_0, pad_type = var_2153_pad_type_0, strides = var_2153_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2153_cast_fp16")]; + tensor current_key_15_cast_fp16 = add(x = var_2147_cast_fp16, y = var_2153_cast_fp16)[name = string("current_key_15_cast_fp16")]; + string var_2163_pad_type_0 = const()[name = string("op_2163_pad_type_0"), val = string("valid")]; + tensor var_2163_strides_0 = const()[name = string("op_2163_strides_0"), val = tensor([1, 1])]; + tensor var_2163_pad_0 = const()[name = string("op_2163_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2163_dilations_0 = const()[name = string("op_2163_dilations_0"), val = tensor([1, 1])]; + int32 var_2163_groups_0 = const()[name = string("op_2163_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118357440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118652416))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118652544)))]; + tensor var_2163_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2163_dilations_0, groups = var_2163_groups_0, pad = var_2163_pad_0, pad_type = var_2163_pad_type_0, strides = var_2163_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2163_cast_fp16")]; + string var_2169_pad_type_0 = const()[name = string("op_2169_pad_type_0"), val = string("valid")]; + tensor var_2169_strides_0 = const()[name = string("op_2169_strides_0"), val = tensor([1, 1])]; + tensor var_2169_pad_0 = const()[name = string("op_2169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2169_dilations_0 = const()[name = string("op_2169_dilations_0"), val = tensor([1, 1])]; + int32 var_2169_groups_0 = const()[name = string("op_2169_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118660608))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118654144))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2169_cast_fp16 = conv(dilations = var_2169_dilations_0, groups = var_2169_groups_0, pad = var_2169_pad_0, pad_type = var_2169_pad_type_0, strides = var_2169_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2169_cast_fp16")]; + tensor current_value_15_cast_fp16 = add(x = var_2163_cast_fp16, y = var_2169_cast_fp16)[name = string("current_value_15_cast_fp16")]; + tensor var_2175_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2175_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_71_cast_fp16_7, y = var_2175_cast_fp16)[name = string("key_15_cast_fp16")]; + tensor var_2177_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2177_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_86_cast_fp16_7, y = var_2177_cast_fp16)[name = string("value_15_cast_fp16")]; + tensor var_2180 = const()[name = string("op_2180"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_29_cast_fp16 = reshape(shape = var_2180, x = query_29_cast_fp16)[name = string("mh_q_29_cast_fp16")]; + fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2183_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2182_to_fp16)[name = string("op_2183_cast_fp16")]; + tensor var_2184 = const()[name = string("op_2184"), val = tensor([1, 12, 64, -1])]; + tensor var_2185_cast_fp16 = reshape(shape = var_2184, x = key_15_cast_fp16)[name = string("op_2185_cast_fp16")]; + bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)]; + bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)]; + tensor mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_2183_cast_fp16, y = var_2185_cast_fp16)[name = string("mh_w_57_cast_fp16")]; + tensor mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_59_cast_fp16")]; + tensor var_2193_cast_fp16 = softmax(axis = var_2085, x = mh_w_59_cast_fp16)[name = string("op_2193_cast_fp16")]; + tensor var_2194 = const()[name = string("op_2194"), val = tensor([1, 12, 64, -1])]; + tensor var_2195_cast_fp16 = reshape(shape = var_2194, x = value_15_cast_fp16)[name = string("op_2195_cast_fp16")]; + bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)]; + bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2195_cast_fp16, y = var_2193_cast_fp16)[name = string("attn_29_cast_fp16")]; + tensor var_2198 = const()[name = string("op_2198"), val = tensor([1, 768, 1, -1])]; + tensor input_71_cast_fp16 = reshape(shape = var_2198, x = attn_29_cast_fp16)[name = string("input_71_cast_fp16")]; + string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")]; + tensor var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor([1, 1])]; + tensor var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor([1, 1])]; + int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118734400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119029376))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119029504)))]; + tensor var_2208_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_2208_cast_fp16")]; + string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")]; + tensor var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor([1, 1])]; + tensor var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor([1, 1])]; + int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119036928))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119031104))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_2214_cast_fp16")]; + tensor obj_139_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("obj_139_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_139_cast_fp16)[name = string("inputs_45_cast_fp16")]; + tensor out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor([1])]; + fp16 var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2229_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")]; + tensor obj_141_gamma_0_to_fp16 = const()[name = string("obj_141_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119110720)))]; + tensor obj_141_beta_0_to_fp16 = const()[name = string("obj_141_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119112320)))]; + fp16 obj_141_epsilon_0_to_fp16 = const()[name = string("obj_141_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_141_cast_fp16")]; + string var_2249_pad_type_0 = const()[name = string("op_2249_pad_type_0"), val = string("valid")]; + tensor var_2249_strides_0 = const()[name = string("op_2249_strides_0"), val = tensor([1, 1])]; + tensor var_2249_pad_0 = const()[name = string("op_2249_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2249_dilations_0 = const()[name = string("op_2249_dilations_0"), val = tensor([1, 1])]; + int32 var_2249_groups_0 = const()[name = string("op_2249_groups_0"), val = int32(1)]; + tensor layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119113920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119408896))))[name = string("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119409024)))]; + tensor var_2249_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2249_dilations_0, groups = var_2249_groups_0, pad = var_2249_pad_0, pad_type = var_2249_pad_type_0, strides = var_2249_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("op_2249_cast_fp16")]; + string var_2255_pad_type_0 = const()[name = string("op_2255_pad_type_0"), val = string("valid")]; + tensor var_2255_strides_0 = const()[name = string("op_2255_strides_0"), val = tensor([1, 1])]; + tensor var_2255_pad_0 = const()[name = string("op_2255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2255_dilations_0 = const()[name = string("op_2255_dilations_0"), val = tensor([1, 1])]; + int32 var_2255_groups_0 = const()[name = string("op_2255_groups_0"), val = int32(1)]; + tensor layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119417088))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119410624))))[name = string("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2255_cast_fp16 = conv(dilations = var_2255_dilations_0, groups = var_2255_groups_0, pad = var_2255_pad_0, pad_type = var_2255_pad_type_0, strides = var_2255_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = string("op_2255_cast_fp16")]; + tensor query_31_cast_fp16 = add(x = var_2249_cast_fp16, y = var_2255_cast_fp16)[name = string("query_31_cast_fp16")]; + tensor var_2258 = const()[name = string("op_2258"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_31_cast_fp16 = reshape(shape = var_2258, x = query_31_cast_fp16)[name = string("mh_q_31_cast_fp16")]; + fp16 var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2261_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2260_to_fp16)[name = string("op_2261_cast_fp16")]; + tensor var_2262 = const()[name = string("op_2262"), val = tensor([1, 12, 64, -1])]; + tensor var_2263_cast_fp16 = reshape(shape = var_2262, x = obj_143_cast_fp16)[name = string("op_2263_cast_fp16")]; + bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)]; + bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2261_cast_fp16, y = var_2263_cast_fp16)[name = string("mh_w_61_cast_fp16")]; + tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_63_cast_fp16")]; + tensor obj_149_cast_fp16 = softmax(axis = var_2085, x = mh_w_63_cast_fp16)[name = string("obj_149_cast_fp16")]; + tensor var_2272 = const()[name = string("op_2272"), val = tensor([1, 12, 64, -1])]; + tensor var_2273_cast_fp16 = reshape(shape = var_2272, x = obj_145_cast_fp16)[name = string("op_2273_cast_fp16")]; + bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)]; + bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2273_cast_fp16, y = obj_149_cast_fp16)[name = string("attn_31_cast_fp16")]; + tensor var_2276 = const()[name = string("op_2276"), val = tensor([1, 768, 1, -1])]; + tensor input_73_cast_fp16 = reshape(shape = var_2276, x = attn_31_cast_fp16)[name = string("input_73_cast_fp16")]; + string var_2286_pad_type_0 = const()[name = string("op_2286_pad_type_0"), val = string("valid")]; + tensor var_2286_strides_0 = const()[name = string("op_2286_strides_0"), val = tensor([1, 1])]; + tensor var_2286_pad_0 = const()[name = string("op_2286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2286_dilations_0 = const()[name = string("op_2286_dilations_0"), val = tensor([1, 1])]; + int32 var_2286_groups_0 = const()[name = string("op_2286_groups_0"), val = int32(1)]; + tensor layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119490880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119785856))))[name = string("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119785984)))]; + tensor var_2286_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2286_dilations_0, groups = var_2286_groups_0, pad = var_2286_pad_0, pad_type = var_2286_pad_type_0, strides = var_2286_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_2286_cast_fp16")]; + string var_2292_pad_type_0 = const()[name = string("op_2292_pad_type_0"), val = string("valid")]; + tensor var_2292_strides_0 = const()[name = string("op_2292_strides_0"), val = tensor([1, 1])]; + tensor var_2292_pad_0 = const()[name = string("op_2292_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2292_dilations_0 = const()[name = string("op_2292_dilations_0"), val = tensor([1, 1])]; + int32 var_2292_groups_0 = const()[name = string("op_2292_groups_0"), val = int32(1)]; + tensor layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119794240))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119787584))))[name = string("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2292_cast_fp16 = conv(dilations = var_2292_dilations_0, groups = var_2292_groups_0, pad = var_2292_pad_0, pad_type = var_2292_pad_type_0, strides = var_2292_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_2292_cast_fp16")]; + tensor obj_147_cast_fp16 = add(x = var_2286_cast_fp16, y = var_2292_cast_fp16)[name = string("obj_147_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_147_cast_fp16)[name = string("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor([1])]; + fp16 var_2303_to_fp16 = const()[name = string("op_2303_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2303_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119868032)))]; + tensor input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119869632)))]; + fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_75_cast_fp16")]; + string var_2321_pad_type_0 = const()[name = string("op_2321_pad_type_0"), val = string("valid")]; + tensor var_2321_strides_0 = const()[name = string("op_2321_strides_0"), val = tensor([1, 1])]; + tensor var_2321_pad_0 = const()[name = string("op_2321_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2321_dilations_0 = const()[name = string("op_2321_dilations_0"), val = tensor([1, 1])]; + int32 var_2321_groups_0 = const()[name = string("op_2321_groups_0"), val = int32(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119871232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121050944))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121051072)))]; + tensor var_2321_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2321_dilations_0, groups = var_2321_groups_0, pad = var_2321_pad_0, pad_type = var_2321_pad_type_0, strides = var_2321_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_2321_cast_fp16")]; + string var_2327_pad_type_0 = const()[name = string("op_2327_pad_type_0"), val = string("valid")]; + tensor var_2327_strides_0 = const()[name = string("op_2327_strides_0"), val = tensor([1, 1])]; + tensor var_2327_pad_0 = const()[name = string("op_2327_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2327_dilations_0 = const()[name = string("op_2327_dilations_0"), val = tensor([1, 1])]; + int32 var_2327_groups_0 = const()[name = string("op_2327_groups_0"), val = int32(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121076224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121057280))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2327_cast_fp16 = conv(dilations = var_2327_dilations_0, groups = var_2327_groups_0, pad = var_2327_pad_0, pad_type = var_2327_pad_type_0, strides = var_2327_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_2327_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_2321_cast_fp16, y = var_2327_cast_fp16)[name = string("input_77_cast_fp16")]; + string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")]; + string var_2338_pad_type_0 = const()[name = string("op_2338_pad_type_0"), val = string("valid")]; + tensor var_2338_strides_0 = const()[name = string("op_2338_strides_0"), val = tensor([1, 1])]; + tensor var_2338_pad_0 = const()[name = string("op_2338_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2338_dilations_0 = const()[name = string("op_2338_dilations_0"), val = tensor([1, 1])]; + int32 var_2338_groups_0 = const()[name = string("op_2338_groups_0"), val = int32(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121371200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122550912))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122551040)))]; + tensor var_2338_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2338_dilations_0, groups = var_2338_groups_0, pad = var_2338_pad_0, pad_type = var_2338_pad_type_0, strides = var_2338_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_2338_cast_fp16")]; + string var_2344_pad_type_0 = const()[name = string("op_2344_pad_type_0"), val = string("valid")]; + tensor var_2344_strides_0 = const()[name = string("op_2344_strides_0"), val = tensor([1, 1])]; + tensor var_2344_pad_0 = const()[name = string("op_2344_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2344_dilations_0 = const()[name = string("op_2344_dilations_0"), val = tensor([1, 1])]; + int32 var_2344_groups_0 = const()[name = string("op_2344_groups_0"), val = int32(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122576704))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122552640))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2344_cast_fp16 = conv(dilations = var_2344_dilations_0, groups = var_2344_groups_0, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2344_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_2344_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_2338_cast_fp16, y = var_2344_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_49_cast_fp16")]; + tensor obj_161_begin_0 = const()[name = string("obj_161_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor obj_161_end_0 = const()[name = string("obj_161_end_0"), val = tensor([9, 768, 1, 1536])]; + tensor obj_161_end_mask_0 = const()[name = string("obj_161_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_161_cast_fp16 = slice_by_index(begin = obj_161_begin_0, end = obj_161_end_0, end_mask = obj_161_end_mask_0, x = read_state_2)[name = string("obj_161_cast_fp16")]; + tensor obj_163_begin_0 = const()[name = string("obj_163_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor obj_163_end_0 = const()[name = string("obj_163_end_0"), val = tensor([9, 768, 1, 1536])]; + tensor obj_163_end_mask_0 = const()[name = string("obj_163_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_163_cast_fp16 = slice_by_index(begin = obj_163_begin_0, end = obj_163_end_0, end_mask = obj_163_end_mask_0, x = read_state_3)[name = string("obj_163_cast_fp16")]; + int32 var_2366 = const()[name = string("op_2366"), val = int32(3)]; + tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; + fp16 var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2391_to_fp16, x = inputs_49_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor obj_151_gamma_0_to_fp16 = const()[name = string("obj_151_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122871680)))]; + tensor obj_151_beta_0_to_fp16 = const()[name = string("obj_151_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122873280)))]; + fp16 obj_151_epsilon_0_to_fp16 = const()[name = string("obj_151_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_151_cast_fp16 = batch_norm(beta = obj_151_beta_0_to_fp16, epsilon = obj_151_epsilon_0_to_fp16, gamma = obj_151_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_49_cast_fp16)[name = string("obj_151_cast_fp16")]; + string var_2413_pad_type_0 = const()[name = string("op_2413_pad_type_0"), val = string("valid")]; + tensor var_2413_strides_0 = const()[name = string("op_2413_strides_0"), val = tensor([1, 1])]; + tensor var_2413_pad_0 = const()[name = string("op_2413_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2413_dilations_0 = const()[name = string("op_2413_dilations_0"), val = tensor([1, 1])]; + int32 var_2413_groups_0 = const()[name = string("op_2413_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122874880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123169856))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123169984)))]; + tensor var_2413_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2413_dilations_0, groups = var_2413_groups_0, pad = var_2413_pad_0, pad_type = var_2413_pad_type_0, strides = var_2413_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2413_cast_fp16")]; + string var_2419_pad_type_0 = const()[name = string("op_2419_pad_type_0"), val = string("valid")]; + tensor var_2419_strides_0 = const()[name = string("op_2419_strides_0"), val = tensor([1, 1])]; + tensor var_2419_pad_0 = const()[name = string("op_2419_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2419_dilations_0 = const()[name = string("op_2419_dilations_0"), val = tensor([1, 1])]; + int32 var_2419_groups_0 = const()[name = string("op_2419_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123176896))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123171584))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2419_cast_fp16 = conv(dilations = var_2419_dilations_0, groups = var_2419_groups_0, pad = var_2419_pad_0, pad_type = var_2419_pad_type_0, strides = var_2419_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2419_cast_fp16")]; + tensor query_33_cast_fp16 = add(x = var_2413_cast_fp16, y = var_2419_cast_fp16)[name = string("query_33_cast_fp16")]; + string var_2428_pad_type_0 = const()[name = string("op_2428_pad_type_0"), val = string("valid")]; + tensor var_2428_strides_0 = const()[name = string("op_2428_strides_0"), val = tensor([1, 1])]; + tensor var_2428_pad_0 = const()[name = string("op_2428_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2428_dilations_0 = const()[name = string("op_2428_dilations_0"), val = tensor([1, 1])]; + int32 var_2428_groups_0 = const()[name = string("op_2428_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123250688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123545664))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2428_cast_fp16 = conv(dilations = var_2428_dilations_0, groups = var_2428_groups_0, pad = var_2428_pad_0, pad_type = var_2428_pad_type_0, strides = var_2428_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2428_cast_fp16")]; + string var_2434_pad_type_0 = const()[name = string("op_2434_pad_type_0"), val = string("valid")]; + tensor var_2434_strides_0 = const()[name = string("op_2434_strides_0"), val = tensor([1, 1])]; + tensor var_2434_pad_0 = const()[name = string("op_2434_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2434_dilations_0 = const()[name = string("op_2434_dilations_0"), val = tensor([1, 1])]; + int32 var_2434_groups_0 = const()[name = string("op_2434_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123551040))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123545792))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2434_cast_fp16 = conv(dilations = var_2434_dilations_0, groups = var_2434_groups_0, pad = var_2434_pad_0, pad_type = var_2434_pad_type_0, strides = var_2434_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2434_cast_fp16")]; + tensor current_key_17_cast_fp16 = add(x = var_2428_cast_fp16, y = var_2434_cast_fp16)[name = string("current_key_17_cast_fp16")]; + string var_2444_pad_type_0 = const()[name = string("op_2444_pad_type_0"), val = string("valid")]; + tensor var_2444_strides_0 = const()[name = string("op_2444_strides_0"), val = tensor([1, 1])]; + tensor var_2444_pad_0 = const()[name = string("op_2444_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2444_dilations_0 = const()[name = string("op_2444_dilations_0"), val = tensor([1, 1])]; + int32 var_2444_groups_0 = const()[name = string("op_2444_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123624832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123919808))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123919936)))]; + tensor var_2444_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2444_dilations_0, groups = var_2444_groups_0, pad = var_2444_pad_0, pad_type = var_2444_pad_type_0, strides = var_2444_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2444_cast_fp16")]; + string var_2450_pad_type_0 = const()[name = string("op_2450_pad_type_0"), val = string("valid")]; + tensor var_2450_strides_0 = const()[name = string("op_2450_strides_0"), val = tensor([1, 1])]; + tensor var_2450_pad_0 = const()[name = string("op_2450_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2450_dilations_0 = const()[name = string("op_2450_dilations_0"), val = tensor([1, 1])]; + int32 var_2450_groups_0 = const()[name = string("op_2450_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123926656))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123921536))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2450_cast_fp16 = conv(dilations = var_2450_dilations_0, groups = var_2450_groups_0, pad = var_2450_pad_0, pad_type = var_2450_pad_type_0, strides = var_2450_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2450_cast_fp16")]; + tensor current_value_17_cast_fp16 = add(x = var_2444_cast_fp16, y = var_2450_cast_fp16)[name = string("current_value_17_cast_fp16")]; + tensor var_2456_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2456_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_71_cast_fp16_8, y = var_2456_cast_fp16)[name = string("key_17_cast_fp16")]; + tensor var_2458_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2458_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_86_cast_fp16_8, y = var_2458_cast_fp16)[name = string("value_17_cast_fp16")]; + tensor var_2461 = const()[name = string("op_2461"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_33_cast_fp16 = reshape(shape = var_2461, x = query_33_cast_fp16)[name = string("mh_q_33_cast_fp16")]; + fp16 var_2463_to_fp16 = const()[name = string("op_2463_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2464_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2463_to_fp16)[name = string("op_2464_cast_fp16")]; + tensor var_2465 = const()[name = string("op_2465"), val = tensor([1, 12, 64, -1])]; + tensor var_2466_cast_fp16 = reshape(shape = var_2465, x = key_17_cast_fp16)[name = string("op_2466_cast_fp16")]; + bool mh_w_65_transpose_x_0 = const()[name = string("mh_w_65_transpose_x_0"), val = bool(true)]; + bool mh_w_65_transpose_y_0 = const()[name = string("mh_w_65_transpose_y_0"), val = bool(false)]; + tensor mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2464_cast_fp16, y = var_2466_cast_fp16)[name = string("mh_w_65_cast_fp16")]; + tensor mh_w_67_cast_fp16 = add(x = mh_w_65_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_67_cast_fp16")]; + tensor var_2474_cast_fp16 = softmax(axis = var_2366, x = mh_w_67_cast_fp16)[name = string("op_2474_cast_fp16")]; + tensor var_2475 = const()[name = string("op_2475"), val = tensor([1, 12, 64, -1])]; + tensor var_2476_cast_fp16 = reshape(shape = var_2475, x = value_17_cast_fp16)[name = string("op_2476_cast_fp16")]; + bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)]; + bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2476_cast_fp16, y = var_2474_cast_fp16)[name = string("attn_33_cast_fp16")]; + tensor var_2479 = const()[name = string("op_2479"), val = tensor([1, 768, 1, -1])]; + tensor input_81_cast_fp16 = reshape(shape = var_2479, x = attn_33_cast_fp16)[name = string("input_81_cast_fp16")]; + string var_2489_pad_type_0 = const()[name = string("op_2489_pad_type_0"), val = string("valid")]; + tensor var_2489_strides_0 = const()[name = string("op_2489_strides_0"), val = tensor([1, 1])]; + tensor var_2489_pad_0 = const()[name = string("op_2489_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2489_dilations_0 = const()[name = string("op_2489_dilations_0"), val = tensor([1, 1])]; + int32 var_2489_groups_0 = const()[name = string("op_2489_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124000448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124295424))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124295552)))]; + tensor var_2489_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2489_dilations_0, groups = var_2489_groups_0, pad = var_2489_pad_0, pad_type = var_2489_pad_type_0, strides = var_2489_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2489_cast_fp16")]; + string var_2495_pad_type_0 = const()[name = string("op_2495_pad_type_0"), val = string("valid")]; + tensor var_2495_strides_0 = const()[name = string("op_2495_strides_0"), val = tensor([1, 1])]; + tensor var_2495_pad_0 = const()[name = string("op_2495_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2495_dilations_0 = const()[name = string("op_2495_dilations_0"), val = tensor([1, 1])]; + int32 var_2495_groups_0 = const()[name = string("op_2495_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124303104))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124297152))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2495_cast_fp16 = conv(dilations = var_2495_dilations_0, groups = var_2495_groups_0, pad = var_2495_pad_0, pad_type = var_2495_pad_type_0, strides = var_2495_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2495_cast_fp16")]; + tensor obj_157_cast_fp16 = add(x = var_2489_cast_fp16, y = var_2495_cast_fp16)[name = string("obj_157_cast_fp16")]; + tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_157_cast_fp16)[name = string("inputs_51_cast_fp16")]; + tensor out_51_axes_0 = const()[name = string("out_51_axes_0"), val = tensor([1])]; + fp16 var_2510_to_fp16 = const()[name = string("op_2510_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2510_to_fp16, x = inputs_51_cast_fp16)[name = string("out_51_cast_fp16")]; + tensor obj_159_gamma_0_to_fp16 = const()[name = string("obj_159_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124376896)))]; + tensor obj_159_beta_0_to_fp16 = const()[name = string("obj_159_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124378496)))]; + fp16 obj_159_epsilon_0_to_fp16 = const()[name = string("obj_159_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_159_cast_fp16 = batch_norm(beta = obj_159_beta_0_to_fp16, epsilon = obj_159_epsilon_0_to_fp16, gamma = obj_159_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_51_cast_fp16)[name = string("obj_159_cast_fp16")]; + string var_2530_pad_type_0 = const()[name = string("op_2530_pad_type_0"), val = string("valid")]; + tensor var_2530_strides_0 = const()[name = string("op_2530_strides_0"), val = tensor([1, 1])]; + tensor var_2530_pad_0 = const()[name = string("op_2530_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2530_dilations_0 = const()[name = string("op_2530_dilations_0"), val = tensor([1, 1])]; + int32 var_2530_groups_0 = const()[name = string("op_2530_groups_0"), val = int32(1)]; + tensor layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124380096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124675072))))[name = string("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124675200)))]; + tensor var_2530_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2530_dilations_0, groups = var_2530_groups_0, pad = var_2530_pad_0, pad_type = var_2530_pad_type_0, strides = var_2530_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_159_cast_fp16)[name = string("op_2530_cast_fp16")]; + string var_2536_pad_type_0 = const()[name = string("op_2536_pad_type_0"), val = string("valid")]; + tensor var_2536_strides_0 = const()[name = string("op_2536_strides_0"), val = tensor([1, 1])]; + tensor var_2536_pad_0 = const()[name = string("op_2536_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2536_dilations_0 = const()[name = string("op_2536_dilations_0"), val = tensor([1, 1])]; + int32 var_2536_groups_0 = const()[name = string("op_2536_groups_0"), val = int32(1)]; + tensor layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124682304))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124676800))))[name = string("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2536_cast_fp16 = conv(dilations = var_2536_dilations_0, groups = var_2536_groups_0, pad = var_2536_pad_0, pad_type = var_2536_pad_type_0, strides = var_2536_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_159_cast_fp16)[name = string("op_2536_cast_fp16")]; + tensor query_35_cast_fp16 = add(x = var_2530_cast_fp16, y = var_2536_cast_fp16)[name = string("query_35_cast_fp16")]; + tensor var_2539 = const()[name = string("op_2539"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_35_cast_fp16 = reshape(shape = var_2539, x = query_35_cast_fp16)[name = string("mh_q_35_cast_fp16")]; + fp16 var_2541_to_fp16 = const()[name = string("op_2541_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2542_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2541_to_fp16)[name = string("op_2542_cast_fp16")]; + tensor var_2543 = const()[name = string("op_2543"), val = tensor([1, 12, 64, -1])]; + tensor var_2544_cast_fp16 = reshape(shape = var_2543, x = obj_161_cast_fp16)[name = string("op_2544_cast_fp16")]; + bool mh_w_69_transpose_x_0 = const()[name = string("mh_w_69_transpose_x_0"), val = bool(true)]; + bool mh_w_69_transpose_y_0 = const()[name = string("mh_w_69_transpose_y_0"), val = bool(false)]; + tensor mh_w_69_cast_fp16 = matmul(transpose_x = mh_w_69_transpose_x_0, transpose_y = mh_w_69_transpose_y_0, x = var_2542_cast_fp16, y = var_2544_cast_fp16)[name = string("mh_w_69_cast_fp16")]; + tensor mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_71_cast_fp16")]; + tensor obj_167_cast_fp16 = softmax(axis = var_2366, x = mh_w_71_cast_fp16)[name = string("obj_167_cast_fp16")]; + tensor var_2553 = const()[name = string("op_2553"), val = tensor([1, 12, 64, -1])]; + tensor var_2554_cast_fp16 = reshape(shape = var_2553, x = obj_163_cast_fp16)[name = string("op_2554_cast_fp16")]; + bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)]; + bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2554_cast_fp16, y = obj_167_cast_fp16)[name = string("attn_35_cast_fp16")]; + tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 768, 1, -1])]; + tensor input_83_cast_fp16 = reshape(shape = var_2557, x = attn_35_cast_fp16)[name = string("input_83_cast_fp16")]; + string var_2567_pad_type_0 = const()[name = string("op_2567_pad_type_0"), val = string("valid")]; + tensor var_2567_strides_0 = const()[name = string("op_2567_strides_0"), val = tensor([1, 1])]; + tensor var_2567_pad_0 = const()[name = string("op_2567_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2567_dilations_0 = const()[name = string("op_2567_dilations_0"), val = tensor([1, 1])]; + int32 var_2567_groups_0 = const()[name = string("op_2567_groups_0"), val = int32(1)]; + tensor layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124756096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125051072))))[name = string("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125051200)))]; + tensor var_2567_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2567_dilations_0, groups = var_2567_groups_0, pad = var_2567_pad_0, pad_type = var_2567_pad_type_0, strides = var_2567_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2567_cast_fp16")]; + string var_2573_pad_type_0 = const()[name = string("op_2573_pad_type_0"), val = string("valid")]; + tensor var_2573_strides_0 = const()[name = string("op_2573_strides_0"), val = tensor([1, 1])]; + tensor var_2573_pad_0 = const()[name = string("op_2573_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2573_dilations_0 = const()[name = string("op_2573_dilations_0"), val = tensor([1, 1])]; + int32 var_2573_groups_0 = const()[name = string("op_2573_groups_0"), val = int32(1)]; + tensor layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125057536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125052800))))[name = string("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2573_cast_fp16 = conv(dilations = var_2573_dilations_0, groups = var_2573_groups_0, pad = var_2573_pad_0, pad_type = var_2573_pad_type_0, strides = var_2573_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2573_cast_fp16")]; + tensor obj_165_cast_fp16 = add(x = var_2567_cast_fp16, y = var_2573_cast_fp16)[name = string("obj_165_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_165_cast_fp16)[name = string("inputs_53_cast_fp16")]; + tensor out_53_axes_0 = const()[name = string("out_53_axes_0"), val = tensor([1])]; + fp16 var_2587_to_fp16 = const()[name = string("op_2587_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2587_to_fp16, x = inputs_53_cast_fp16)[name = string("out_53_cast_fp16")]; + tensor input_85_gamma_0_to_fp16 = const()[name = string("input_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125131328)))]; + tensor input_85_beta_0_to_fp16 = const()[name = string("input_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125132928)))]; + fp16 input_85_epsilon_0_to_fp16 = const()[name = string("input_85_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_53_cast_fp16)[name = string("input_85_cast_fp16")]; + string var_2605_pad_type_0 = const()[name = string("op_2605_pad_type_0"), val = string("valid")]; + tensor var_2605_strides_0 = const()[name = string("op_2605_strides_0"), val = tensor([1, 1])]; + tensor var_2605_pad_0 = const()[name = string("op_2605_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2605_dilations_0 = const()[name = string("op_2605_dilations_0"), val = tensor([1, 1])]; + int32 var_2605_groups_0 = const()[name = string("op_2605_groups_0"), val = int32(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125134528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126314240))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126314368)))]; + tensor var_2605_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2605_dilations_0, groups = var_2605_groups_0, pad = var_2605_pad_0, pad_type = var_2605_pad_type_0, strides = var_2605_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("op_2605_cast_fp16")]; + string var_2611_pad_type_0 = const()[name = string("op_2611_pad_type_0"), val = string("valid")]; + tensor var_2611_strides_0 = const()[name = string("op_2611_strides_0"), val = tensor([1, 1])]; + tensor var_2611_pad_0 = const()[name = string("op_2611_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2611_dilations_0 = const()[name = string("op_2611_dilations_0"), val = tensor([1, 1])]; + int32 var_2611_groups_0 = const()[name = string("op_2611_groups_0"), val = int32(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126339264))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126320576))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2611_cast_fp16 = conv(dilations = var_2611_dilations_0, groups = var_2611_groups_0, pad = var_2611_pad_0, pad_type = var_2611_pad_type_0, strides = var_2611_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = string("op_2611_cast_fp16")]; + tensor input_87_cast_fp16 = add(x = var_2605_cast_fp16, y = var_2611_cast_fp16)[name = string("input_87_cast_fp16")]; + string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("EXACT")]; + tensor input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")]; + string var_2622_pad_type_0 = const()[name = string("op_2622_pad_type_0"), val = string("valid")]; + tensor var_2622_strides_0 = const()[name = string("op_2622_strides_0"), val = tensor([1, 1])]; + tensor var_2622_pad_0 = const()[name = string("op_2622_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2622_dilations_0 = const()[name = string("op_2622_dilations_0"), val = tensor([1, 1])]; + int32 var_2622_groups_0 = const()[name = string("op_2622_groups_0"), val = int32(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126634240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127813952))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127814080)))]; + tensor var_2622_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2622_dilations_0, groups = var_2622_groups_0, pad = var_2622_pad_0, pad_type = var_2622_pad_type_0, strides = var_2622_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2622_cast_fp16")]; + string var_2628_pad_type_0 = const()[name = string("op_2628_pad_type_0"), val = string("valid")]; + tensor var_2628_strides_0 = const()[name = string("op_2628_strides_0"), val = tensor([1, 1])]; + tensor var_2628_pad_0 = const()[name = string("op_2628_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2628_dilations_0 = const()[name = string("op_2628_dilations_0"), val = tensor([1, 1])]; + int32 var_2628_groups_0 = const()[name = string("op_2628_groups_0"), val = int32(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127840320))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127815680))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2628_cast_fp16 = conv(dilations = var_2628_dilations_0, groups = var_2628_groups_0, pad = var_2628_pad_0, pad_type = var_2628_pad_type_0, strides = var_2628_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2628_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_2622_cast_fp16, y = var_2628_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_55_cast_fp16")]; + tensor obj_179_begin_0 = const()[name = string("obj_179_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor obj_179_end_0 = const()[name = string("obj_179_end_0"), val = tensor([10, 768, 1, 1536])]; + tensor obj_179_end_mask_0 = const()[name = string("obj_179_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_179_cast_fp16 = slice_by_index(begin = obj_179_begin_0, end = obj_179_end_0, end_mask = obj_179_end_mask_0, x = read_state_2)[name = string("obj_179_cast_fp16")]; + tensor obj_181_begin_0 = const()[name = string("obj_181_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor obj_181_end_0 = const()[name = string("obj_181_end_0"), val = tensor([10, 768, 1, 1536])]; + tensor obj_181_end_mask_0 = const()[name = string("obj_181_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_181_cast_fp16 = slice_by_index(begin = obj_181_begin_0, end = obj_181_end_0, end_mask = obj_181_end_mask_0, x = read_state_3)[name = string("obj_181_cast_fp16")]; + int32 var_2651 = const()[name = string("op_2651"), val = int32(3)]; + tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; + fp16 var_2676_to_fp16 = const()[name = string("op_2676_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2676_to_fp16, x = inputs_55_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor obj_169_gamma_0_to_fp16 = const()[name = string("obj_169_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128135296)))]; + tensor obj_169_beta_0_to_fp16 = const()[name = string("obj_169_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128136896)))]; + fp16 obj_169_epsilon_0_to_fp16 = const()[name = string("obj_169_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_169_cast_fp16 = batch_norm(beta = obj_169_beta_0_to_fp16, epsilon = obj_169_epsilon_0_to_fp16, gamma = obj_169_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_55_cast_fp16)[name = string("obj_169_cast_fp16")]; + string var_2698_pad_type_0 = const()[name = string("op_2698_pad_type_0"), val = string("valid")]; + tensor var_2698_strides_0 = const()[name = string("op_2698_strides_0"), val = tensor([1, 1])]; + tensor var_2698_pad_0 = const()[name = string("op_2698_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2698_dilations_0 = const()[name = string("op_2698_dilations_0"), val = tensor([1, 1])]; + int32 var_2698_groups_0 = const()[name = string("op_2698_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128138496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128433472))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128433600)))]; + tensor var_2698_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2698_dilations_0, groups = var_2698_groups_0, pad = var_2698_pad_0, pad_type = var_2698_pad_type_0, strides = var_2698_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2698_cast_fp16")]; + string var_2704_pad_type_0 = const()[name = string("op_2704_pad_type_0"), val = string("valid")]; + tensor var_2704_strides_0 = const()[name = string("op_2704_strides_0"), val = tensor([1, 1])]; + tensor var_2704_pad_0 = const()[name = string("op_2704_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2704_dilations_0 = const()[name = string("op_2704_dilations_0"), val = tensor([1, 1])]; + int32 var_2704_groups_0 = const()[name = string("op_2704_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128440768))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128435200))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2704_cast_fp16 = conv(dilations = var_2704_dilations_0, groups = var_2704_groups_0, pad = var_2704_pad_0, pad_type = var_2704_pad_type_0, strides = var_2704_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2704_cast_fp16")]; + tensor query_37_cast_fp16 = add(x = var_2698_cast_fp16, y = var_2704_cast_fp16)[name = string("query_37_cast_fp16")]; + string var_2713_pad_type_0 = const()[name = string("op_2713_pad_type_0"), val = string("valid")]; + tensor var_2713_strides_0 = const()[name = string("op_2713_strides_0"), val = tensor([1, 1])]; + tensor var_2713_pad_0 = const()[name = string("op_2713_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2713_dilations_0 = const()[name = string("op_2713_dilations_0"), val = tensor([1, 1])]; + int32 var_2713_groups_0 = const()[name = string("op_2713_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128514560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128809536))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2713_cast_fp16 = conv(dilations = var_2713_dilations_0, groups = var_2713_groups_0, pad = var_2713_pad_0, pad_type = var_2713_pad_type_0, strides = var_2713_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2713_cast_fp16")]; + string var_2719_pad_type_0 = const()[name = string("op_2719_pad_type_0"), val = string("valid")]; + tensor var_2719_strides_0 = const()[name = string("op_2719_strides_0"), val = tensor([1, 1])]; + tensor var_2719_pad_0 = const()[name = string("op_2719_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2719_dilations_0 = const()[name = string("op_2719_dilations_0"), val = tensor([1, 1])]; + int32 var_2719_groups_0 = const()[name = string("op_2719_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128815488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128809664))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2719_cast_fp16 = conv(dilations = var_2719_dilations_0, groups = var_2719_groups_0, pad = var_2719_pad_0, pad_type = var_2719_pad_type_0, strides = var_2719_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2719_cast_fp16")]; + tensor current_key_19_cast_fp16 = add(x = var_2713_cast_fp16, y = var_2719_cast_fp16)[name = string("current_key_19_cast_fp16")]; + string var_2729_pad_type_0 = const()[name = string("op_2729_pad_type_0"), val = string("valid")]; + tensor var_2729_strides_0 = const()[name = string("op_2729_strides_0"), val = tensor([1, 1])]; + tensor var_2729_pad_0 = const()[name = string("op_2729_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2729_dilations_0 = const()[name = string("op_2729_dilations_0"), val = tensor([1, 1])]; + int32 var_2729_groups_0 = const()[name = string("op_2729_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128889280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129184256))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129184384)))]; + tensor var_2729_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2729_dilations_0, groups = var_2729_groups_0, pad = var_2729_pad_0, pad_type = var_2729_pad_type_0, strides = var_2729_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2729_cast_fp16")]; + string var_2735_pad_type_0 = const()[name = string("op_2735_pad_type_0"), val = string("valid")]; + tensor var_2735_strides_0 = const()[name = string("op_2735_strides_0"), val = tensor([1, 1])]; + tensor var_2735_pad_0 = const()[name = string("op_2735_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2735_dilations_0 = const()[name = string("op_2735_dilations_0"), val = tensor([1, 1])]; + int32 var_2735_groups_0 = const()[name = string("op_2735_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129193664))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129185984))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2735_cast_fp16 = conv(dilations = var_2735_dilations_0, groups = var_2735_groups_0, pad = var_2735_pad_0, pad_type = var_2735_pad_type_0, strides = var_2735_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2735_cast_fp16")]; + tensor current_value_19_cast_fp16 = add(x = var_2729_cast_fp16, y = var_2735_cast_fp16)[name = string("current_value_19_cast_fp16")]; + tensor var_2741_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2741_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_71_cast_fp16_9, y = var_2741_cast_fp16)[name = string("key_19_cast_fp16")]; + tensor var_2743_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2743_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_86_cast_fp16_9, y = var_2743_cast_fp16)[name = string("value_19_cast_fp16")]; + tensor var_2746 = const()[name = string("op_2746"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_37_cast_fp16 = reshape(shape = var_2746, x = query_37_cast_fp16)[name = string("mh_q_37_cast_fp16")]; + fp16 var_2748_to_fp16 = const()[name = string("op_2748_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2749_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2748_to_fp16)[name = string("op_2749_cast_fp16")]; + tensor var_2750 = const()[name = string("op_2750"), val = tensor([1, 12, 64, -1])]; + tensor var_2751_cast_fp16 = reshape(shape = var_2750, x = key_19_cast_fp16)[name = string("op_2751_cast_fp16")]; + bool mh_w_73_transpose_x_0 = const()[name = string("mh_w_73_transpose_x_0"), val = bool(true)]; + bool mh_w_73_transpose_y_0 = const()[name = string("mh_w_73_transpose_y_0"), val = bool(false)]; + tensor mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_2749_cast_fp16, y = var_2751_cast_fp16)[name = string("mh_w_73_cast_fp16")]; + tensor mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_75_cast_fp16")]; + tensor var_2759_cast_fp16 = softmax(axis = var_2651, x = mh_w_75_cast_fp16)[name = string("op_2759_cast_fp16")]; + tensor var_2760 = const()[name = string("op_2760"), val = tensor([1, 12, 64, -1])]; + tensor var_2761_cast_fp16 = reshape(shape = var_2760, x = value_19_cast_fp16)[name = string("op_2761_cast_fp16")]; + bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)]; + bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2761_cast_fp16, y = var_2759_cast_fp16)[name = string("attn_37_cast_fp16")]; + tensor var_2764 = const()[name = string("op_2764"), val = tensor([1, 768, 1, -1])]; + tensor input_91_cast_fp16 = reshape(shape = var_2764, x = attn_37_cast_fp16)[name = string("input_91_cast_fp16")]; + string var_2774_pad_type_0 = const()[name = string("op_2774_pad_type_0"), val = string("valid")]; + tensor var_2774_strides_0 = const()[name = string("op_2774_strides_0"), val = tensor([1, 1])]; + tensor var_2774_pad_0 = const()[name = string("op_2774_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2774_dilations_0 = const()[name = string("op_2774_dilations_0"), val = tensor([1, 1])]; + int32 var_2774_groups_0 = const()[name = string("op_2774_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129267456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129562432))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129562560)))]; + tensor var_2774_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2774_dilations_0, groups = var_2774_groups_0, pad = var_2774_pad_0, pad_type = var_2774_pad_type_0, strides = var_2774_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2774_cast_fp16")]; + string var_2780_pad_type_0 = const()[name = string("op_2780_pad_type_0"), val = string("valid")]; + tensor var_2780_strides_0 = const()[name = string("op_2780_strides_0"), val = tensor([1, 1])]; + tensor var_2780_pad_0 = const()[name = string("op_2780_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2780_dilations_0 = const()[name = string("op_2780_dilations_0"), val = tensor([1, 1])]; + int32 var_2780_groups_0 = const()[name = string("op_2780_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129571904))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129564160))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2780_cast_fp16 = conv(dilations = var_2780_dilations_0, groups = var_2780_groups_0, pad = var_2780_pad_0, pad_type = var_2780_pad_type_0, strides = var_2780_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2780_cast_fp16")]; + tensor obj_175_cast_fp16 = add(x = var_2774_cast_fp16, y = var_2780_cast_fp16)[name = string("obj_175_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_175_cast_fp16)[name = string("inputs_57_cast_fp16")]; + tensor out_57_axes_0 = const()[name = string("out_57_axes_0"), val = tensor([1])]; + fp16 var_2795_to_fp16 = const()[name = string("op_2795_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2795_to_fp16, x = inputs_57_cast_fp16)[name = string("out_57_cast_fp16")]; + tensor obj_177_gamma_0_to_fp16 = const()[name = string("obj_177_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129645696)))]; + tensor obj_177_beta_0_to_fp16 = const()[name = string("obj_177_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129647296)))]; + fp16 obj_177_epsilon_0_to_fp16 = const()[name = string("obj_177_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_177_cast_fp16 = batch_norm(beta = obj_177_beta_0_to_fp16, epsilon = obj_177_epsilon_0_to_fp16, gamma = obj_177_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_57_cast_fp16)[name = string("obj_177_cast_fp16")]; + string var_2815_pad_type_0 = const()[name = string("op_2815_pad_type_0"), val = string("valid")]; + tensor var_2815_strides_0 = const()[name = string("op_2815_strides_0"), val = tensor([1, 1])]; + tensor var_2815_pad_0 = const()[name = string("op_2815_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2815_dilations_0 = const()[name = string("op_2815_dilations_0"), val = tensor([1, 1])]; + int32 var_2815_groups_0 = const()[name = string("op_2815_groups_0"), val = int32(1)]; + tensor layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129648896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129943872))))[name = string("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129944000)))]; + tensor var_2815_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2815_dilations_0, groups = var_2815_groups_0, pad = var_2815_pad_0, pad_type = var_2815_pad_type_0, strides = var_2815_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_177_cast_fp16)[name = string("op_2815_cast_fp16")]; + string var_2821_pad_type_0 = const()[name = string("op_2821_pad_type_0"), val = string("valid")]; + tensor var_2821_strides_0 = const()[name = string("op_2821_strides_0"), val = tensor([1, 1])]; + tensor var_2821_pad_0 = const()[name = string("op_2821_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2821_dilations_0 = const()[name = string("op_2821_dilations_0"), val = tensor([1, 1])]; + int32 var_2821_groups_0 = const()[name = string("op_2821_groups_0"), val = int32(1)]; + tensor layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129950784))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129945600))))[name = string("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2821_cast_fp16 = conv(dilations = var_2821_dilations_0, groups = var_2821_groups_0, pad = var_2821_pad_0, pad_type = var_2821_pad_type_0, strides = var_2821_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_177_cast_fp16)[name = string("op_2821_cast_fp16")]; + tensor query_39_cast_fp16 = add(x = var_2815_cast_fp16, y = var_2821_cast_fp16)[name = string("query_39_cast_fp16")]; + tensor var_2824 = const()[name = string("op_2824"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_39_cast_fp16 = reshape(shape = var_2824, x = query_39_cast_fp16)[name = string("mh_q_39_cast_fp16")]; + fp16 var_2826_to_fp16 = const()[name = string("op_2826_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2827_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2826_to_fp16)[name = string("op_2827_cast_fp16")]; + tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 12, 64, -1])]; + tensor var_2829_cast_fp16 = reshape(shape = var_2828, x = obj_179_cast_fp16)[name = string("op_2829_cast_fp16")]; + bool mh_w_77_transpose_x_0 = const()[name = string("mh_w_77_transpose_x_0"), val = bool(true)]; + bool mh_w_77_transpose_y_0 = const()[name = string("mh_w_77_transpose_y_0"), val = bool(false)]; + tensor mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_2827_cast_fp16, y = var_2829_cast_fp16)[name = string("mh_w_77_cast_fp16")]; + tensor mh_w_79_cast_fp16 = add(x = mh_w_77_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_79_cast_fp16")]; + tensor obj_185_cast_fp16 = softmax(axis = var_2651, x = mh_w_79_cast_fp16)[name = string("obj_185_cast_fp16")]; + tensor var_2838 = const()[name = string("op_2838"), val = tensor([1, 12, 64, -1])]; + tensor var_2839_cast_fp16 = reshape(shape = var_2838, x = obj_181_cast_fp16)[name = string("op_2839_cast_fp16")]; + bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)]; + bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2839_cast_fp16, y = obj_185_cast_fp16)[name = string("attn_39_cast_fp16")]; + tensor var_2842 = const()[name = string("op_2842"), val = tensor([1, 768, 1, -1])]; + tensor input_93_cast_fp16 = reshape(shape = var_2842, x = attn_39_cast_fp16)[name = string("input_93_cast_fp16")]; + string var_2852_pad_type_0 = const()[name = string("op_2852_pad_type_0"), val = string("valid")]; + tensor var_2852_strides_0 = const()[name = string("op_2852_strides_0"), val = tensor([1, 1])]; + tensor var_2852_pad_0 = const()[name = string("op_2852_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2852_dilations_0 = const()[name = string("op_2852_dilations_0"), val = tensor([1, 1])]; + int32 var_2852_groups_0 = const()[name = string("op_2852_groups_0"), val = int32(1)]; + tensor layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130024576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130319552))))[name = string("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130319680)))]; + tensor var_2852_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2852_dilations_0, groups = var_2852_groups_0, pad = var_2852_pad_0, pad_type = var_2852_pad_type_0, strides = var_2852_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("op_2852_cast_fp16")]; + string var_2858_pad_type_0 = const()[name = string("op_2858_pad_type_0"), val = string("valid")]; + tensor var_2858_strides_0 = const()[name = string("op_2858_strides_0"), val = tensor([1, 1])]; + tensor var_2858_pad_0 = const()[name = string("op_2858_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2858_dilations_0 = const()[name = string("op_2858_dilations_0"), val = tensor([1, 1])]; + int32 var_2858_groups_0 = const()[name = string("op_2858_groups_0"), val = int32(1)]; + tensor layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130326784))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130321280))))[name = string("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2858_cast_fp16 = conv(dilations = var_2858_dilations_0, groups = var_2858_groups_0, pad = var_2858_pad_0, pad_type = var_2858_pad_type_0, strides = var_2858_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = string("op_2858_cast_fp16")]; + tensor obj_183_cast_fp16 = add(x = var_2852_cast_fp16, y = var_2858_cast_fp16)[name = string("obj_183_cast_fp16")]; + tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_183_cast_fp16)[name = string("inputs_59_cast_fp16")]; + tensor out_59_axes_0 = const()[name = string("out_59_axes_0"), val = tensor([1])]; + fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2872_to_fp16, x = inputs_59_cast_fp16)[name = string("out_59_cast_fp16")]; + tensor input_95_gamma_0_to_fp16 = const()[name = string("input_95_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130400576)))]; + tensor input_95_beta_0_to_fp16 = const()[name = string("input_95_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130402176)))]; + fp16 input_95_epsilon_0_to_fp16 = const()[name = string("input_95_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_59_cast_fp16)[name = string("input_95_cast_fp16")]; + string var_2890_pad_type_0 = const()[name = string("op_2890_pad_type_0"), val = string("valid")]; + tensor var_2890_strides_0 = const()[name = string("op_2890_strides_0"), val = tensor([1, 1])]; + tensor var_2890_pad_0 = const()[name = string("op_2890_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2890_dilations_0 = const()[name = string("op_2890_dilations_0"), val = tensor([1, 1])]; + int32 var_2890_groups_0 = const()[name = string("op_2890_groups_0"), val = int32(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130403776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131583488))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131583616)))]; + tensor var_2890_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_2890_dilations_0, groups = var_2890_groups_0, pad = var_2890_pad_0, pad_type = var_2890_pad_type_0, strides = var_2890_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2890_cast_fp16")]; + string var_2896_pad_type_0 = const()[name = string("op_2896_pad_type_0"), val = string("valid")]; + tensor var_2896_strides_0 = const()[name = string("op_2896_strides_0"), val = tensor([1, 1])]; + tensor var_2896_pad_0 = const()[name = string("op_2896_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2896_dilations_0 = const()[name = string("op_2896_dilations_0"), val = tensor([1, 1])]; + int32 var_2896_groups_0 = const()[name = string("op_2896_groups_0"), val = int32(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131606976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131589824))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2896_cast_fp16 = conv(dilations = var_2896_dilations_0, groups = var_2896_groups_0, pad = var_2896_pad_0, pad_type = var_2896_pad_type_0, strides = var_2896_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2896_cast_fp16")]; + tensor input_97_cast_fp16 = add(x = var_2890_cast_fp16, y = var_2896_cast_fp16)[name = string("input_97_cast_fp16")]; + string input_99_mode_0 = const()[name = string("input_99_mode_0"), val = string("EXACT")]; + tensor input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = string("input_99_cast_fp16")]; + string var_2907_pad_type_0 = const()[name = string("op_2907_pad_type_0"), val = string("valid")]; + tensor var_2907_strides_0 = const()[name = string("op_2907_strides_0"), val = tensor([1, 1])]; + tensor var_2907_pad_0 = const()[name = string("op_2907_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2907_dilations_0 = const()[name = string("op_2907_dilations_0"), val = tensor([1, 1])]; + int32 var_2907_groups_0 = const()[name = string("op_2907_groups_0"), val = int32(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131901952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133081664))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133081792)))]; + tensor var_2907_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_2907_dilations_0, groups = var_2907_groups_0, pad = var_2907_pad_0, pad_type = var_2907_pad_type_0, strides = var_2907_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_2907_cast_fp16")]; + string var_2913_pad_type_0 = const()[name = string("op_2913_pad_type_0"), val = string("valid")]; + tensor var_2913_strides_0 = const()[name = string("op_2913_strides_0"), val = tensor([1, 1])]; + tensor var_2913_pad_0 = const()[name = string("op_2913_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2913_dilations_0 = const()[name = string("op_2913_dilations_0"), val = tensor([1, 1])]; + int32 var_2913_groups_0 = const()[name = string("op_2913_groups_0"), val = int32(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133107072))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133083392))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2913_cast_fp16 = conv(dilations = var_2913_dilations_0, groups = var_2913_groups_0, pad = var_2913_pad_0, pad_type = var_2913_pad_type_0, strides = var_2913_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = string("op_2913_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_2907_cast_fp16, y = var_2913_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_61_cast_fp16")]; + tensor obj_197_begin_0 = const()[name = string("obj_197_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor obj_197_end_0 = const()[name = string("obj_197_end_0"), val = tensor([11, 768, 1, 1536])]; + tensor obj_197_end_mask_0 = const()[name = string("obj_197_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_197_cast_fp16 = slice_by_index(begin = obj_197_begin_0, end = obj_197_end_0, end_mask = obj_197_end_mask_0, x = read_state_2)[name = string("obj_197_cast_fp16")]; + tensor obj_199_begin_0 = const()[name = string("obj_199_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor obj_199_end_0 = const()[name = string("obj_199_end_0"), val = tensor([11, 768, 1, 1536])]; + tensor obj_199_end_mask_0 = const()[name = string("obj_199_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_199_cast_fp16 = slice_by_index(begin = obj_199_begin_0, end = obj_199_end_0, end_mask = obj_199_end_mask_0, x = read_state_3)[name = string("obj_199_cast_fp16")]; + int32 var_2936 = const()[name = string("op_2936"), val = int32(3)]; + tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; + fp16 var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2961_to_fp16, x = inputs_61_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor obj_187_gamma_0_to_fp16 = const()[name = string("obj_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133402048)))]; + tensor obj_187_beta_0_to_fp16 = const()[name = string("obj_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133403648)))]; + fp16 obj_187_epsilon_0_to_fp16 = const()[name = string("obj_187_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_187_cast_fp16 = batch_norm(beta = obj_187_beta_0_to_fp16, epsilon = obj_187_epsilon_0_to_fp16, gamma = obj_187_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_61_cast_fp16)[name = string("obj_187_cast_fp16")]; + string var_2983_pad_type_0 = const()[name = string("op_2983_pad_type_0"), val = string("valid")]; + tensor var_2983_strides_0 = const()[name = string("op_2983_strides_0"), val = tensor([1, 1])]; + tensor var_2983_pad_0 = const()[name = string("op_2983_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2983_dilations_0 = const()[name = string("op_2983_dilations_0"), val = tensor([1, 1])]; + int32 var_2983_groups_0 = const()[name = string("op_2983_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133405248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133700224))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133700352)))]; + tensor var_2983_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2983_dilations_0, groups = var_2983_groups_0, pad = var_2983_pad_0, pad_type = var_2983_pad_type_0, strides = var_2983_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2983_cast_fp16")]; + string var_2989_pad_type_0 = const()[name = string("op_2989_pad_type_0"), val = string("valid")]; + tensor var_2989_strides_0 = const()[name = string("op_2989_strides_0"), val = tensor([1, 1])]; + tensor var_2989_pad_0 = const()[name = string("op_2989_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2989_dilations_0 = const()[name = string("op_2989_dilations_0"), val = tensor([1, 1])]; + int32 var_2989_groups_0 = const()[name = string("op_2989_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133706944))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133701952))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2989_cast_fp16 = conv(dilations = var_2989_dilations_0, groups = var_2989_groups_0, pad = var_2989_pad_0, pad_type = var_2989_pad_type_0, strides = var_2989_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_2989_cast_fp16")]; + tensor query_41_cast_fp16 = add(x = var_2983_cast_fp16, y = var_2989_cast_fp16)[name = string("query_41_cast_fp16")]; + string var_2998_pad_type_0 = const()[name = string("op_2998_pad_type_0"), val = string("valid")]; + tensor var_2998_strides_0 = const()[name = string("op_2998_strides_0"), val = tensor([1, 1])]; + tensor var_2998_pad_0 = const()[name = string("op_2998_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2998_dilations_0 = const()[name = string("op_2998_dilations_0"), val = tensor([1, 1])]; + int32 var_2998_groups_0 = const()[name = string("op_2998_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133780736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134075712))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2998_cast_fp16 = conv(dilations = var_2998_dilations_0, groups = var_2998_groups_0, pad = var_2998_pad_0, pad_type = var_2998_pad_type_0, strides = var_2998_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2998_cast_fp16")]; + string var_3004_pad_type_0 = const()[name = string("op_3004_pad_type_0"), val = string("valid")]; + tensor var_3004_strides_0 = const()[name = string("op_3004_strides_0"), val = tensor([1, 1])]; + tensor var_3004_pad_0 = const()[name = string("op_3004_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3004_dilations_0 = const()[name = string("op_3004_dilations_0"), val = tensor([1, 1])]; + int32 var_3004_groups_0 = const()[name = string("op_3004_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134081280))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134075840))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3004_cast_fp16 = conv(dilations = var_3004_dilations_0, groups = var_3004_groups_0, pad = var_3004_pad_0, pad_type = var_3004_pad_type_0, strides = var_3004_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3004_cast_fp16")]; + tensor current_key_21_cast_fp16 = add(x = var_2998_cast_fp16, y = var_3004_cast_fp16)[name = string("current_key_21_cast_fp16")]; + string var_3014_pad_type_0 = const()[name = string("op_3014_pad_type_0"), val = string("valid")]; + tensor var_3014_strides_0 = const()[name = string("op_3014_strides_0"), val = tensor([1, 1])]; + tensor var_3014_pad_0 = const()[name = string("op_3014_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3014_dilations_0 = const()[name = string("op_3014_dilations_0"), val = tensor([1, 1])]; + int32 var_3014_groups_0 = const()[name = string("op_3014_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134155072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134450048))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134450176)))]; + tensor var_3014_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3014_dilations_0, groups = var_3014_groups_0, pad = var_3014_pad_0, pad_type = var_3014_pad_type_0, strides = var_3014_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3014_cast_fp16")]; + string var_3020_pad_type_0 = const()[name = string("op_3020_pad_type_0"), val = string("valid")]; + tensor var_3020_strides_0 = const()[name = string("op_3020_strides_0"), val = tensor([1, 1])]; + tensor var_3020_pad_0 = const()[name = string("op_3020_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3020_dilations_0 = const()[name = string("op_3020_dilations_0"), val = tensor([1, 1])]; + int32 var_3020_groups_0 = const()[name = string("op_3020_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134458560))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134451776))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3020_cast_fp16 = conv(dilations = var_3020_dilations_0, groups = var_3020_groups_0, pad = var_3020_pad_0, pad_type = var_3020_pad_type_0, strides = var_3020_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3020_cast_fp16")]; + tensor current_value_21_cast_fp16 = add(x = var_3014_cast_fp16, y = var_3020_cast_fp16)[name = string("current_value_21_cast_fp16")]; + tensor var_3026_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3026_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_71_cast_fp16_10, y = var_3026_cast_fp16)[name = string("key_21_cast_fp16")]; + tensor var_3028_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3028_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_86_cast_fp16_10, y = var_3028_cast_fp16)[name = string("value_21_cast_fp16")]; + tensor var_3031 = const()[name = string("op_3031"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_41_cast_fp16 = reshape(shape = var_3031, x = query_41_cast_fp16)[name = string("mh_q_41_cast_fp16")]; + fp16 var_3033_to_fp16 = const()[name = string("op_3033_to_fp16"), val = fp16(0x1p-3)]; + tensor var_3034_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3033_to_fp16)[name = string("op_3034_cast_fp16")]; + tensor var_3035 = const()[name = string("op_3035"), val = tensor([1, 12, 64, -1])]; + tensor var_3036_cast_fp16 = reshape(shape = var_3035, x = key_21_cast_fp16)[name = string("op_3036_cast_fp16")]; + bool mh_w_81_transpose_x_0 = const()[name = string("mh_w_81_transpose_x_0"), val = bool(true)]; + bool mh_w_81_transpose_y_0 = const()[name = string("mh_w_81_transpose_y_0"), val = bool(false)]; + tensor mh_w_81_cast_fp16 = matmul(transpose_x = mh_w_81_transpose_x_0, transpose_y = mh_w_81_transpose_y_0, x = var_3034_cast_fp16, y = var_3036_cast_fp16)[name = string("mh_w_81_cast_fp16")]; + tensor mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_83_cast_fp16")]; + tensor var_3044_cast_fp16 = softmax(axis = var_2936, x = mh_w_83_cast_fp16)[name = string("op_3044_cast_fp16")]; + tensor var_3045 = const()[name = string("op_3045"), val = tensor([1, 12, 64, -1])]; + tensor var_3046_cast_fp16 = reshape(shape = var_3045, x = value_21_cast_fp16)[name = string("op_3046_cast_fp16")]; + bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)]; + bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3046_cast_fp16, y = var_3044_cast_fp16)[name = string("attn_41_cast_fp16")]; + tensor var_3049 = const()[name = string("op_3049"), val = tensor([1, 768, 1, -1])]; + tensor input_101_cast_fp16 = reshape(shape = var_3049, x = attn_41_cast_fp16)[name = string("input_101_cast_fp16")]; + string var_3059_pad_type_0 = const()[name = string("op_3059_pad_type_0"), val = string("valid")]; + tensor var_3059_strides_0 = const()[name = string("op_3059_strides_0"), val = tensor([1, 1])]; + tensor var_3059_pad_0 = const()[name = string("op_3059_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3059_dilations_0 = const()[name = string("op_3059_dilations_0"), val = tensor([1, 1])]; + int32 var_3059_groups_0 = const()[name = string("op_3059_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134532352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134827328))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134827456)))]; + tensor var_3059_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3059_dilations_0, groups = var_3059_groups_0, pad = var_3059_pad_0, pad_type = var_3059_pad_type_0, strides = var_3059_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("op_3059_cast_fp16")]; + string var_3065_pad_type_0 = const()[name = string("op_3065_pad_type_0"), val = string("valid")]; + tensor var_3065_strides_0 = const()[name = string("op_3065_strides_0"), val = tensor([1, 1])]; + tensor var_3065_pad_0 = const()[name = string("op_3065_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3065_dilations_0 = const()[name = string("op_3065_dilations_0"), val = tensor([1, 1])]; + int32 var_3065_groups_0 = const()[name = string("op_3065_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134835904))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134829056))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3065_cast_fp16 = conv(dilations = var_3065_dilations_0, groups = var_3065_groups_0, pad = var_3065_pad_0, pad_type = var_3065_pad_type_0, strides = var_3065_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = string("op_3065_cast_fp16")]; + tensor obj_193_cast_fp16 = add(x = var_3059_cast_fp16, y = var_3065_cast_fp16)[name = string("obj_193_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_193_cast_fp16)[name = string("inputs_63_cast_fp16")]; + tensor out_63_axes_0 = const()[name = string("out_63_axes_0"), val = tensor([1])]; + fp16 var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3080_to_fp16, x = inputs_63_cast_fp16)[name = string("out_63_cast_fp16")]; + tensor obj_195_gamma_0_to_fp16 = const()[name = string("obj_195_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134909696)))]; + tensor obj_195_beta_0_to_fp16 = const()[name = string("obj_195_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134911296)))]; + fp16 obj_195_epsilon_0_to_fp16 = const()[name = string("obj_195_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_195_cast_fp16 = batch_norm(beta = obj_195_beta_0_to_fp16, epsilon = obj_195_epsilon_0_to_fp16, gamma = obj_195_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_63_cast_fp16)[name = string("obj_195_cast_fp16")]; + string var_3100_pad_type_0 = const()[name = string("op_3100_pad_type_0"), val = string("valid")]; + tensor var_3100_strides_0 = const()[name = string("op_3100_strides_0"), val = tensor([1, 1])]; + tensor var_3100_pad_0 = const()[name = string("op_3100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3100_dilations_0 = const()[name = string("op_3100_dilations_0"), val = tensor([1, 1])]; + int32 var_3100_groups_0 = const()[name = string("op_3100_groups_0"), val = int32(1)]; + tensor layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134912896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135207872))))[name = string("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135208000)))]; + tensor var_3100_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3100_dilations_0, groups = var_3100_groups_0, pad = var_3100_pad_0, pad_type = var_3100_pad_type_0, strides = var_3100_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_195_cast_fp16)[name = string("op_3100_cast_fp16")]; + string var_3106_pad_type_0 = const()[name = string("op_3106_pad_type_0"), val = string("valid")]; + tensor var_3106_strides_0 = const()[name = string("op_3106_strides_0"), val = tensor([1, 1])]; + tensor var_3106_pad_0 = const()[name = string("op_3106_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3106_dilations_0 = const()[name = string("op_3106_dilations_0"), val = tensor([1, 1])]; + int32 var_3106_groups_0 = const()[name = string("op_3106_groups_0"), val = int32(1)]; + tensor layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135214912))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135209600))))[name = string("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3106_cast_fp16 = conv(dilations = var_3106_dilations_0, groups = var_3106_groups_0, pad = var_3106_pad_0, pad_type = var_3106_pad_type_0, strides = var_3106_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_195_cast_fp16)[name = string("op_3106_cast_fp16")]; + tensor query_43_cast_fp16 = add(x = var_3100_cast_fp16, y = var_3106_cast_fp16)[name = string("query_43_cast_fp16")]; + tensor var_3109 = const()[name = string("op_3109"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_43_cast_fp16 = reshape(shape = var_3109, x = query_43_cast_fp16)[name = string("mh_q_43_cast_fp16")]; + fp16 var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = fp16(0x1p-3)]; + tensor var_3112_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3111_to_fp16)[name = string("op_3112_cast_fp16")]; + tensor var_3113 = const()[name = string("op_3113"), val = tensor([1, 12, 64, -1])]; + tensor var_3114_cast_fp16 = reshape(shape = var_3113, x = obj_197_cast_fp16)[name = string("op_3114_cast_fp16")]; + bool mh_w_85_transpose_x_0 = const()[name = string("mh_w_85_transpose_x_0"), val = bool(true)]; + bool mh_w_85_transpose_y_0 = const()[name = string("mh_w_85_transpose_y_0"), val = bool(false)]; + tensor mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_3112_cast_fp16, y = var_3114_cast_fp16)[name = string("mh_w_85_cast_fp16")]; + tensor mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_87_cast_fp16")]; + tensor obj_203_cast_fp16 = softmax(axis = var_2936, x = mh_w_87_cast_fp16)[name = string("obj_203_cast_fp16")]; + tensor var_3123 = const()[name = string("op_3123"), val = tensor([1, 12, 64, -1])]; + tensor var_3124_cast_fp16 = reshape(shape = var_3123, x = obj_199_cast_fp16)[name = string("op_3124_cast_fp16")]; + bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)]; + bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3124_cast_fp16, y = obj_203_cast_fp16)[name = string("attn_43_cast_fp16")]; + tensor var_3127 = const()[name = string("op_3127"), val = tensor([1, 768, 1, -1])]; + tensor input_103_cast_fp16 = reshape(shape = var_3127, x = attn_43_cast_fp16)[name = string("input_103_cast_fp16")]; + string var_3137_pad_type_0 = const()[name = string("op_3137_pad_type_0"), val = string("valid")]; + tensor var_3137_strides_0 = const()[name = string("op_3137_strides_0"), val = tensor([1, 1])]; + tensor var_3137_pad_0 = const()[name = string("op_3137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3137_dilations_0 = const()[name = string("op_3137_dilations_0"), val = tensor([1, 1])]; + int32 var_3137_groups_0 = const()[name = string("op_3137_groups_0"), val = int32(1)]; + tensor layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135288704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135583680))))[name = string("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135583808)))]; + tensor var_3137_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3137_dilations_0, groups = var_3137_groups_0, pad = var_3137_pad_0, pad_type = var_3137_pad_type_0, strides = var_3137_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("op_3137_cast_fp16")]; + string var_3143_pad_type_0 = const()[name = string("op_3143_pad_type_0"), val = string("valid")]; + tensor var_3143_strides_0 = const()[name = string("op_3143_strides_0"), val = tensor([1, 1])]; + tensor var_3143_pad_0 = const()[name = string("op_3143_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3143_dilations_0 = const()[name = string("op_3143_dilations_0"), val = tensor([1, 1])]; + int32 var_3143_groups_0 = const()[name = string("op_3143_groups_0"), val = int32(1)]; + tensor layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135592064))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135585408))))[name = string("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3143_cast_fp16 = conv(dilations = var_3143_dilations_0, groups = var_3143_groups_0, pad = var_3143_pad_0, pad_type = var_3143_pad_type_0, strides = var_3143_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = string("op_3143_cast_fp16")]; + tensor obj_201_cast_fp16 = add(x = var_3137_cast_fp16, y = var_3143_cast_fp16)[name = string("obj_201_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_201_cast_fp16)[name = string("inputs_65_cast_fp16")]; + tensor out_65_axes_0 = const()[name = string("out_65_axes_0"), val = tensor([1])]; + fp16 var_3157_to_fp16 = const()[name = string("op_3157_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3157_to_fp16, x = inputs_65_cast_fp16)[name = string("out_65_cast_fp16")]; + tensor input_105_gamma_0_to_fp16 = const()[name = string("input_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135665856)))]; + tensor input_105_beta_0_to_fp16 = const()[name = string("input_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135667456)))]; + fp16 input_105_epsilon_0_to_fp16 = const()[name = string("input_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_65_cast_fp16)[name = string("input_105_cast_fp16")]; + string var_3175_pad_type_0 = const()[name = string("op_3175_pad_type_0"), val = string("valid")]; + tensor var_3175_strides_0 = const()[name = string("op_3175_strides_0"), val = tensor([1, 1])]; + tensor var_3175_pad_0 = const()[name = string("op_3175_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3175_dilations_0 = const()[name = string("op_3175_dilations_0"), val = tensor([1, 1])]; + int32 var_3175_groups_0 = const()[name = string("op_3175_groups_0"), val = int32(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135669056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136848768))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136848896)))]; + tensor var_3175_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3175_dilations_0, groups = var_3175_groups_0, pad = var_3175_pad_0, pad_type = var_3175_pad_type_0, strides = var_3175_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("op_3175_cast_fp16")]; + string var_3181_pad_type_0 = const()[name = string("op_3181_pad_type_0"), val = string("valid")]; + tensor var_3181_strides_0 = const()[name = string("op_3181_strides_0"), val = tensor([1, 1])]; + tensor var_3181_pad_0 = const()[name = string("op_3181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3181_dilations_0 = const()[name = string("op_3181_dilations_0"), val = tensor([1, 1])]; + int32 var_3181_groups_0 = const()[name = string("op_3181_groups_0"), val = int32(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136876736))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136855104))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3181_cast_fp16 = conv(dilations = var_3181_dilations_0, groups = var_3181_groups_0, pad = var_3181_pad_0, pad_type = var_3181_pad_type_0, strides = var_3181_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = string("op_3181_cast_fp16")]; + tensor input_107_cast_fp16 = add(x = var_3175_cast_fp16, y = var_3181_cast_fp16)[name = string("input_107_cast_fp16")]; + string input_109_mode_0 = const()[name = string("input_109_mode_0"), val = string("EXACT")]; + tensor input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")]; + string var_3192_pad_type_0 = const()[name = string("op_3192_pad_type_0"), val = string("valid")]; + tensor var_3192_strides_0 = const()[name = string("op_3192_strides_0"), val = tensor([1, 1])]; + tensor var_3192_pad_0 = const()[name = string("op_3192_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3192_dilations_0 = const()[name = string("op_3192_dilations_0"), val = tensor([1, 1])]; + int32 var_3192_groups_0 = const()[name = string("op_3192_groups_0"), val = int32(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137171712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138351424))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138351552)))]; + tensor var_3192_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3192_dilations_0, groups = var_3192_groups_0, pad = var_3192_pad_0, pad_type = var_3192_pad_type_0, strides = var_3192_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("op_3192_cast_fp16")]; + string var_3198_pad_type_0 = const()[name = string("op_3198_pad_type_0"), val = string("valid")]; + tensor var_3198_strides_0 = const()[name = string("op_3198_strides_0"), val = tensor([1, 1])]; + tensor var_3198_pad_0 = const()[name = string("op_3198_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3198_dilations_0 = const()[name = string("op_3198_dilations_0"), val = tensor([1, 1])]; + int32 var_3198_groups_0 = const()[name = string("op_3198_groups_0"), val = int32(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138378752))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138353152))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3198_cast_fp16 = conv(dilations = var_3198_dilations_0, groups = var_3198_groups_0, pad = var_3198_pad_0, pad_type = var_3198_pad_type_0, strides = var_3198_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = string("op_3198_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_3192_cast_fp16, y = var_3198_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_67_cast_fp16")]; + tensor obj_215_begin_0 = const()[name = string("obj_215_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor obj_215_end_0 = const()[name = string("obj_215_end_0"), val = tensor([12, 768, 1, 1536])]; + tensor obj_215_end_mask_0 = const()[name = string("obj_215_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_215_cast_fp16 = slice_by_index(begin = obj_215_begin_0, end = obj_215_end_0, end_mask = obj_215_end_mask_0, x = read_state_2)[name = string("obj_215_cast_fp16")]; + tensor obj_217_begin_0 = const()[name = string("obj_217_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor obj_217_end_0 = const()[name = string("obj_217_end_0"), val = tensor([12, 768, 1, 1536])]; + tensor obj_217_end_mask_0 = const()[name = string("obj_217_end_mask_0"), val = tensor([false, true, true, true])]; + tensor obj_217_cast_fp16 = slice_by_index(begin = obj_217_begin_0, end = obj_217_end_0, end_mask = obj_217_end_mask_0, x = read_state_3)[name = string("obj_217_cast_fp16")]; + int32 var_3221 = const()[name = string("op_3221"), val = int32(3)]; + tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; + fp16 var_3246_to_fp16 = const()[name = string("op_3246_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3246_to_fp16, x = inputs_67_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor obj_205_gamma_0_to_fp16 = const()[name = string("obj_205_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138673728)))]; + tensor obj_205_beta_0_to_fp16 = const()[name = string("obj_205_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138675328)))]; + fp16 obj_205_epsilon_0_to_fp16 = const()[name = string("obj_205_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_205_cast_fp16 = batch_norm(beta = obj_205_beta_0_to_fp16, epsilon = obj_205_epsilon_0_to_fp16, gamma = obj_205_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_67_cast_fp16)[name = string("obj_205_cast_fp16")]; + string var_3268_pad_type_0 = const()[name = string("op_3268_pad_type_0"), val = string("valid")]; + tensor var_3268_strides_0 = const()[name = string("op_3268_strides_0"), val = tensor([1, 1])]; + tensor var_3268_pad_0 = const()[name = string("op_3268_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3268_dilations_0 = const()[name = string("op_3268_dilations_0"), val = tensor([1, 1])]; + int32 var_3268_groups_0 = const()[name = string("op_3268_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138676928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138971904))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138972032)))]; + tensor var_3268_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3268_dilations_0, groups = var_3268_groups_0, pad = var_3268_pad_0, pad_type = var_3268_pad_type_0, strides = var_3268_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3268_cast_fp16")]; + string var_3274_pad_type_0 = const()[name = string("op_3274_pad_type_0"), val = string("valid")]; + tensor var_3274_strides_0 = const()[name = string("op_3274_strides_0"), val = tensor([1, 1])]; + tensor var_3274_pad_0 = const()[name = string("op_3274_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3274_dilations_0 = const()[name = string("op_3274_dilations_0"), val = tensor([1, 1])]; + int32 var_3274_groups_0 = const()[name = string("op_3274_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138978816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138973632))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3274_cast_fp16 = conv(dilations = var_3274_dilations_0, groups = var_3274_groups_0, pad = var_3274_pad_0, pad_type = var_3274_pad_type_0, strides = var_3274_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3274_cast_fp16")]; + tensor query_45_cast_fp16 = add(x = var_3268_cast_fp16, y = var_3274_cast_fp16)[name = string("query_45_cast_fp16")]; + string var_3283_pad_type_0 = const()[name = string("op_3283_pad_type_0"), val = string("valid")]; + tensor var_3283_strides_0 = const()[name = string("op_3283_strides_0"), val = tensor([1, 1])]; + tensor var_3283_pad_0 = const()[name = string("op_3283_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3283_dilations_0 = const()[name = string("op_3283_dilations_0"), val = tensor([1, 1])]; + int32 var_3283_groups_0 = const()[name = string("op_3283_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139052608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139347584))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_3283_cast_fp16 = conv(dilations = var_3283_dilations_0, groups = var_3283_groups_0, pad = var_3283_pad_0, pad_type = var_3283_pad_type_0, strides = var_3283_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3283_cast_fp16")]; + string var_3289_pad_type_0 = const()[name = string("op_3289_pad_type_0"), val = string("valid")]; + tensor var_3289_strides_0 = const()[name = string("op_3289_strides_0"), val = tensor([1, 1])]; + tensor var_3289_pad_0 = const()[name = string("op_3289_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3289_dilations_0 = const()[name = string("op_3289_dilations_0"), val = tensor([1, 1])]; + int32 var_3289_groups_0 = const()[name = string("op_3289_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139352704))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139347712))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3289_cast_fp16 = conv(dilations = var_3289_dilations_0, groups = var_3289_groups_0, pad = var_3289_pad_0, pad_type = var_3289_pad_type_0, strides = var_3289_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3289_cast_fp16")]; + tensor current_key_cast_fp16 = add(x = var_3283_cast_fp16, y = var_3289_cast_fp16)[name = string("current_key_cast_fp16")]; + string var_3299_pad_type_0 = const()[name = string("op_3299_pad_type_0"), val = string("valid")]; + tensor var_3299_strides_0 = const()[name = string("op_3299_strides_0"), val = tensor([1, 1])]; + tensor var_3299_pad_0 = const()[name = string("op_3299_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3299_dilations_0 = const()[name = string("op_3299_dilations_0"), val = tensor([1, 1])]; + int32 var_3299_groups_0 = const()[name = string("op_3299_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139426496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139721472))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139721600)))]; + tensor var_3299_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3299_dilations_0, groups = var_3299_groups_0, pad = var_3299_pad_0, pad_type = var_3299_pad_type_0, strides = var_3299_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3299_cast_fp16")]; + string var_3305_pad_type_0 = const()[name = string("op_3305_pad_type_0"), val = string("valid")]; + tensor var_3305_strides_0 = const()[name = string("op_3305_strides_0"), val = tensor([1, 1])]; + tensor var_3305_pad_0 = const()[name = string("op_3305_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3305_dilations_0 = const()[name = string("op_3305_dilations_0"), val = tensor([1, 1])]; + int32 var_3305_groups_0 = const()[name = string("op_3305_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139729792))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139723200))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3305_cast_fp16 = conv(dilations = var_3305_dilations_0, groups = var_3305_groups_0, pad = var_3305_pad_0, pad_type = var_3305_pad_type_0, strides = var_3305_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3305_cast_fp16")]; + tensor current_value_cast_fp16 = add(x = var_3299_cast_fp16, y = var_3305_cast_fp16)[name = string("current_value_cast_fp16")]; + tensor var_3311_cast_fp16 = mul(x = current_key_cast_fp16, y = var_202_cast_fp16)[name = string("op_3311_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_71_cast_fp16_11, y = var_3311_cast_fp16)[name = string("key_cast_fp16")]; + tensor var_3313_cast_fp16 = mul(x = current_value_cast_fp16, y = var_202_cast_fp16)[name = string("op_3313_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_86_cast_fp16_11, y = var_3313_cast_fp16)[name = string("value_cast_fp16")]; + tensor var_3316 = const()[name = string("op_3316"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_45_cast_fp16 = reshape(shape = var_3316, x = query_45_cast_fp16)[name = string("mh_q_45_cast_fp16")]; + fp16 var_3318_to_fp16 = const()[name = string("op_3318_to_fp16"), val = fp16(0x1p-3)]; + tensor var_3319_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3318_to_fp16)[name = string("op_3319_cast_fp16")]; + tensor var_3320 = const()[name = string("op_3320"), val = tensor([1, 12, 64, -1])]; + tensor var_3321_cast_fp16 = reshape(shape = var_3320, x = key_cast_fp16)[name = string("op_3321_cast_fp16")]; + bool mh_w_89_transpose_x_0 = const()[name = string("mh_w_89_transpose_x_0"), val = bool(true)]; + bool mh_w_89_transpose_y_0 = const()[name = string("mh_w_89_transpose_y_0"), val = bool(false)]; + tensor mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_3319_cast_fp16, y = var_3321_cast_fp16)[name = string("mh_w_89_cast_fp16")]; + tensor mh_w_91_cast_fp16 = add(x = mh_w_89_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_91_cast_fp16")]; + tensor var_3329_cast_fp16 = softmax(axis = var_3221, x = mh_w_91_cast_fp16)[name = string("op_3329_cast_fp16")]; + tensor var_3330 = const()[name = string("op_3330"), val = tensor([1, 12, 64, -1])]; + tensor var_3331_cast_fp16 = reshape(shape = var_3330, x = value_cast_fp16)[name = string("op_3331_cast_fp16")]; + bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)]; + bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3331_cast_fp16, y = var_3329_cast_fp16)[name = string("attn_45_cast_fp16")]; + tensor var_3334 = const()[name = string("op_3334"), val = tensor([1, 768, 1, -1])]; + tensor input_111_cast_fp16 = reshape(shape = var_3334, x = attn_45_cast_fp16)[name = string("input_111_cast_fp16")]; + string var_3344_pad_type_0 = const()[name = string("op_3344_pad_type_0"), val = string("valid")]; + tensor var_3344_strides_0 = const()[name = string("op_3344_strides_0"), val = tensor([1, 1])]; + tensor var_3344_pad_0 = const()[name = string("op_3344_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3344_dilations_0 = const()[name = string("op_3344_dilations_0"), val = tensor([1, 1])]; + int32 var_3344_groups_0 = const()[name = string("op_3344_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139803584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140098560))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140098688)))]; + tensor var_3344_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3344_dilations_0, groups = var_3344_groups_0, pad = var_3344_pad_0, pad_type = var_3344_pad_type_0, strides = var_3344_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("op_3344_cast_fp16")]; + string var_3350_pad_type_0 = const()[name = string("op_3350_pad_type_0"), val = string("valid")]; + tensor var_3350_strides_0 = const()[name = string("op_3350_strides_0"), val = tensor([1, 1])]; + tensor var_3350_pad_0 = const()[name = string("op_3350_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3350_dilations_0 = const()[name = string("op_3350_dilations_0"), val = tensor([1, 1])]; + int32 var_3350_groups_0 = const()[name = string("op_3350_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140109696))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140100288))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3350_cast_fp16 = conv(dilations = var_3350_dilations_0, groups = var_3350_groups_0, pad = var_3350_pad_0, pad_type = var_3350_pad_type_0, strides = var_3350_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = string("op_3350_cast_fp16")]; + tensor obj_211_cast_fp16 = add(x = var_3344_cast_fp16, y = var_3350_cast_fp16)[name = string("obj_211_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_211_cast_fp16)[name = string("inputs_69_cast_fp16")]; + tensor out_69_axes_0 = const()[name = string("out_69_axes_0"), val = tensor([1])]; + fp16 var_3365_to_fp16 = const()[name = string("op_3365_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3365_to_fp16, x = inputs_69_cast_fp16)[name = string("out_69_cast_fp16")]; + tensor obj_213_gamma_0_to_fp16 = const()[name = string("obj_213_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140183488)))]; + tensor obj_213_beta_0_to_fp16 = const()[name = string("obj_213_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140185088)))]; + fp16 obj_213_epsilon_0_to_fp16 = const()[name = string("obj_213_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_213_cast_fp16 = batch_norm(beta = obj_213_beta_0_to_fp16, epsilon = obj_213_epsilon_0_to_fp16, gamma = obj_213_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_69_cast_fp16)[name = string("obj_213_cast_fp16")]; + string var_3385_pad_type_0 = const()[name = string("op_3385_pad_type_0"), val = string("valid")]; + tensor var_3385_strides_0 = const()[name = string("op_3385_strides_0"), val = tensor([1, 1])]; + tensor var_3385_pad_0 = const()[name = string("op_3385_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3385_dilations_0 = const()[name = string("op_3385_dilations_0"), val = tensor([1, 1])]; + int32 var_3385_groups_0 = const()[name = string("op_3385_groups_0"), val = int32(1)]; + tensor layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140186688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140481664))))[name = string("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140481792)))]; + tensor var_3385_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3385_dilations_0, groups = var_3385_groups_0, pad = var_3385_pad_0, pad_type = var_3385_pad_type_0, strides = var_3385_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("op_3385_cast_fp16")]; + string var_3391_pad_type_0 = const()[name = string("op_3391_pad_type_0"), val = string("valid")]; + tensor var_3391_strides_0 = const()[name = string("op_3391_strides_0"), val = tensor([1, 1])]; + tensor var_3391_pad_0 = const()[name = string("op_3391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3391_dilations_0 = const()[name = string("op_3391_dilations_0"), val = tensor([1, 1])]; + int32 var_3391_groups_0 = const()[name = string("op_3391_groups_0"), val = int32(1)]; + tensor layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140489728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140483392))))[name = string("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3391_cast_fp16 = conv(dilations = var_3391_dilations_0, groups = var_3391_groups_0, pad = var_3391_pad_0, pad_type = var_3391_pad_type_0, strides = var_3391_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_213_cast_fp16)[name = string("op_3391_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_3385_cast_fp16, y = var_3391_cast_fp16)[name = string("query_cast_fp16")]; + tensor var_3394 = const()[name = string("op_3394"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_3394, x = query_cast_fp16)[name = string("mh_q_cast_fp16")]; + fp16 var_3396_to_fp16 = const()[name = string("op_3396_to_fp16"), val = fp16(0x1p-3)]; + tensor var_3397_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3396_to_fp16)[name = string("op_3397_cast_fp16")]; + tensor var_3398 = const()[name = string("op_3398"), val = tensor([1, 12, 64, -1])]; + tensor var_3399_cast_fp16 = reshape(shape = var_3398, x = obj_215_cast_fp16)[name = string("op_3399_cast_fp16")]; + bool mh_w_93_transpose_x_0 = const()[name = string("mh_w_93_transpose_x_0"), val = bool(true)]; + bool mh_w_93_transpose_y_0 = const()[name = string("mh_w_93_transpose_y_0"), val = bool(false)]; + tensor mh_w_93_cast_fp16 = matmul(transpose_x = mh_w_93_transpose_x_0, transpose_y = mh_w_93_transpose_y_0, x = var_3397_cast_fp16, y = var_3399_cast_fp16)[name = string("mh_w_93_cast_fp16")]; + tensor mh_w_cast_fp16 = add(x = mh_w_93_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_cast_fp16")]; + tensor obj_221_cast_fp16 = softmax(axis = var_3221, x = mh_w_cast_fp16)[name = string("obj_221_cast_fp16")]; + tensor var_3408 = const()[name = string("op_3408"), val = tensor([1, 12, 64, -1])]; + tensor var_3409_cast_fp16 = reshape(shape = var_3408, x = obj_217_cast_fp16)[name = string("op_3409_cast_fp16")]; + bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; + bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3409_cast_fp16, y = obj_221_cast_fp16)[name = string("attn_cast_fp16")]; + tensor var_3412 = const()[name = string("op_3412"), val = tensor([1, 768, 1, -1])]; + tensor input_113_cast_fp16 = reshape(shape = var_3412, x = attn_cast_fp16)[name = string("input_113_cast_fp16")]; + string var_3422_pad_type_0 = const()[name = string("op_3422_pad_type_0"), val = string("valid")]; + tensor var_3422_strides_0 = const()[name = string("op_3422_strides_0"), val = tensor([1, 1])]; + tensor var_3422_pad_0 = const()[name = string("op_3422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3422_dilations_0 = const()[name = string("op_3422_dilations_0"), val = tensor([1, 1])]; + int32 var_3422_groups_0 = const()[name = string("op_3422_groups_0"), val = int32(1)]; + tensor layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140563520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140858496))))[name = string("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140858624)))]; + tensor var_3422_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3422_dilations_0, groups = var_3422_groups_0, pad = var_3422_pad_0, pad_type = var_3422_pad_type_0, strides = var_3422_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("op_3422_cast_fp16")]; + string var_3428_pad_type_0 = const()[name = string("op_3428_pad_type_0"), val = string("valid")]; + tensor var_3428_strides_0 = const()[name = string("op_3428_strides_0"), val = tensor([1, 1])]; + tensor var_3428_pad_0 = const()[name = string("op_3428_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3428_dilations_0 = const()[name = string("op_3428_dilations_0"), val = tensor([1, 1])]; + int32 var_3428_groups_0 = const()[name = string("op_3428_groups_0"), val = int32(1)]; + tensor layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140871296))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140860224))))[name = string("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3428_cast_fp16 = conv(dilations = var_3428_dilations_0, groups = var_3428_groups_0, pad = var_3428_pad_0, pad_type = var_3428_pad_type_0, strides = var_3428_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = string("op_3428_cast_fp16")]; + tensor obj_219_cast_fp16 = add(x = var_3422_cast_fp16, y = var_3428_cast_fp16)[name = string("obj_219_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_219_cast_fp16)[name = string("inputs_71_cast_fp16")]; + tensor out_71_axes_0 = const()[name = string("out_71_axes_0"), val = tensor([1])]; + fp16 var_3439_to_fp16 = const()[name = string("op_3439_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3439_to_fp16, x = inputs_71_cast_fp16)[name = string("out_71_cast_fp16")]; + tensor input_115_gamma_0_to_fp16 = const()[name = string("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140945088)))]; + tensor input_115_beta_0_to_fp16 = const()[name = string("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140946688)))]; + fp16 input_115_epsilon_0_to_fp16 = const()[name = string("input_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_71_cast_fp16)[name = string("input_115_cast_fp16")]; + string var_3457_pad_type_0 = const()[name = string("op_3457_pad_type_0"), val = string("valid")]; + tensor var_3457_strides_0 = const()[name = string("op_3457_strides_0"), val = tensor([1, 1])]; + tensor var_3457_pad_0 = const()[name = string("op_3457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3457_dilations_0 = const()[name = string("op_3457_dilations_0"), val = tensor([1, 1])]; + int32 var_3457_groups_0 = const()[name = string("op_3457_groups_0"), val = int32(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140948288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142128000))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142128128)))]; + tensor var_3457_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3457_dilations_0, groups = var_3457_groups_0, pad = var_3457_pad_0, pad_type = var_3457_pad_type_0, strides = var_3457_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_3457_cast_fp16")]; + string var_3463_pad_type_0 = const()[name = string("op_3463_pad_type_0"), val = string("valid")]; + tensor var_3463_strides_0 = const()[name = string("op_3463_strides_0"), val = tensor([1, 1])]; + tensor var_3463_pad_0 = const()[name = string("op_3463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3463_dilations_0 = const()[name = string("op_3463_dilations_0"), val = tensor([1, 1])]; + int32 var_3463_groups_0 = const()[name = string("op_3463_groups_0"), val = int32(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142163008))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142134336))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3463_cast_fp16 = conv(dilations = var_3463_dilations_0, groups = var_3463_groups_0, pad = var_3463_pad_0, pad_type = var_3463_pad_type_0, strides = var_3463_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = string("op_3463_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = var_3457_cast_fp16, y = var_3463_cast_fp16)[name = string("input_117_cast_fp16")]; + string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = string("input_cast_fp16")]; + string var_3474_pad_type_0 = const()[name = string("op_3474_pad_type_0"), val = string("valid")]; + tensor var_3474_strides_0 = const()[name = string("op_3474_strides_0"), val = tensor([1, 1])]; + tensor var_3474_pad_0 = const()[name = string("op_3474_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3474_dilations_0 = const()[name = string("op_3474_dilations_0"), val = tensor([1, 1])]; + int32 var_3474_groups_0 = const()[name = string("op_3474_groups_0"), val = int32(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142457984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143637696))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143637824)))]; + tensor var_3474_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3474_dilations_0, groups = var_3474_groups_0, pad = var_3474_pad_0, pad_type = var_3474_pad_type_0, strides = var_3474_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_3474_cast_fp16")]; + string var_3480_pad_type_0 = const()[name = string("op_3480_pad_type_0"), val = string("valid")]; + tensor var_3480_strides_0 = const()[name = string("op_3480_strides_0"), val = tensor([1, 1])]; + tensor var_3480_pad_0 = const()[name = string("op_3480_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3480_dilations_0 = const()[name = string("op_3480_dilations_0"), val = tensor([1, 1])]; + int32 var_3480_groups_0 = const()[name = string("op_3480_groups_0"), val = int32(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143670336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143639424))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_3480_cast_fp16 = conv(dilations = var_3480_dilations_0, groups = var_3480_groups_0, pad = var_3480_pad_0, pad_type = var_3480_pad_type_0, strides = var_3480_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_3480_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_3474_cast_fp16, y = var_3480_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = string("out_axes_0"), val = tensor([1])]; + fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3499_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")]; + tensor hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143965312)))]; + tensor hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143966912)))]; + fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor var_3510_axes_0 = const()[name = string("op_3510_axes_0"), val = tensor([2])]; + tensor var_3510_cast_fp16 = squeeze(axes = var_3510_axes_0, x = hidden_states_cast_fp16)[name = string("op_3510_cast_fp16")]; + tensor var_3513_perm_0 = const()[name = string("op_3513_perm_0"), val = tensor([0, 2, 1])]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143968512)))]; + tensor var_3513_cast_fp16 = transpose(perm = var_3513_perm_0, x = var_3510_cast_fp16)[name = string("transpose_0")]; + tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3513_cast_fp16)[name = string("linear_0_cast_fp16")]; + int32 var_3517 = const()[name = string("op_3517"), val = int32(1)]; + bool obj_225_interleave_0 = const()[name = string("obj_225_interleave_0"), val = bool(false)]; + tensor key_cache_updates = concat(axis = var_3517, interleave = obj_225_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = string("obj_225_cast_fp16")]; + int32 var_3520 = const()[name = string("op_3520"), val = int32(1)]; + bool obj_227_interleave_0 = const()[name = string("obj_227_interleave_0"), val = bool(false)]; + tensor value_cache_updates = concat(axis = var_3520, interleave = obj_227_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = string("obj_227_cast_fp16")]; + tensor var_3531_begin_0 = const()[name = string("op_3531_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3531_end_0 = const()[name = string("op_3531_end_0"), val = tensor([1, 4, 1, 1536])]; + tensor var_3531_end_mask_0 = const()[name = string("op_3531_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, x = obj_113_cast_fp16)[name = string("op_3531_cast_fp16")]; + tensor var_3534_begin_0 = const()[name = string("op_3534_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3534_end_0 = const()[name = string("op_3534_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3534_end_mask_0 = const()[name = string("op_3534_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3534_squeeze_mask_0 = const()[name = string("op_3534_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3534_cast_fp16 = slice_by_index(begin = var_3534_begin_0, end = var_3534_end_0, end_mask = var_3534_end_mask_0, squeeze_mask = var_3534_squeeze_mask_0, x = var_3531_cast_fp16)[name = string("op_3534_cast_fp16")]; + tensor var_3549_begin_0 = const()[name = string("op_3549_begin_0"), val = tensor([0, 9, 0, 0])]; + tensor var_3549_end_0 = const()[name = string("op_3549_end_0"), val = tensor([1, 10, 1, 1536])]; + tensor var_3549_end_mask_0 = const()[name = string("op_3549_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3549_cast_fp16 = slice_by_index(begin = var_3549_begin_0, end = var_3549_end_0, end_mask = var_3549_end_mask_0, x = obj_113_cast_fp16)[name = string("op_3549_cast_fp16")]; + tensor var_3552_begin_0 = const()[name = string("op_3552_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3552_end_0 = const()[name = string("op_3552_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3552_end_mask_0 = const()[name = string("op_3552_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3552_squeeze_mask_0 = const()[name = string("op_3552_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3552_cast_fp16 = slice_by_index(begin = var_3552_begin_0, end = var_3552_end_0, end_mask = var_3552_end_mask_0, squeeze_mask = var_3552_squeeze_mask_0, x = var_3549_cast_fp16)[name = string("op_3552_cast_fp16")]; + tensor var_3567_begin_0 = const()[name = string("op_3567_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3567_end_0 = const()[name = string("op_3567_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3567_end_mask_0 = const()[name = string("op_3567_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3567_cast_fp16 = slice_by_index(begin = var_3567_begin_0, end = var_3567_end_0, end_mask = var_3567_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3567_cast_fp16")]; + tensor var_3570_begin_0 = const()[name = string("op_3570_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3570_end_0 = const()[name = string("op_3570_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3570_end_mask_0 = const()[name = string("op_3570_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3570_squeeze_mask_0 = const()[name = string("op_3570_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3570_cast_fp16 = slice_by_index(begin = var_3570_begin_0, end = var_3570_end_0, end_mask = var_3570_end_mask_0, squeeze_mask = var_3570_squeeze_mask_0, x = var_3567_cast_fp16)[name = string("op_3570_cast_fp16")]; + tensor var_3585_begin_0 = const()[name = string("op_3585_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3585_end_0 = const()[name = string("op_3585_end_0"), val = tensor([1, 5, 1, 1536])]; + tensor var_3585_end_mask_0 = const()[name = string("op_3585_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3585_cast_fp16")]; + tensor var_3588_begin_0 = const()[name = string("op_3588_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3588_end_0 = const()[name = string("op_3588_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3588_end_mask_0 = const()[name = string("op_3588_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3588_squeeze_mask_0 = const()[name = string("op_3588_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3588_cast_fp16 = slice_by_index(begin = var_3588_begin_0, end = var_3588_end_0, end_mask = var_3588_end_mask_0, squeeze_mask = var_3588_squeeze_mask_0, x = var_3585_cast_fp16)[name = string("op_3588_cast_fp16")]; + tensor var_3603_begin_0 = const()[name = string("op_3603_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3603_end_0 = const()[name = string("op_3603_end_0"), val = tensor([1, 8, 1, 1536])]; + tensor var_3603_end_mask_0 = const()[name = string("op_3603_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3603_cast_fp16 = slice_by_index(begin = var_3603_begin_0, end = var_3603_end_0, end_mask = var_3603_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3603_cast_fp16")]; + tensor var_3606_begin_0 = const()[name = string("op_3606_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3606_end_0 = const()[name = string("op_3606_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3606_end_mask_0 = const()[name = string("op_3606_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3606_squeeze_mask_0 = const()[name = string("op_3606_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3606_cast_fp16 = slice_by_index(begin = var_3606_begin_0, end = var_3606_end_0, end_mask = var_3606_end_mask_0, squeeze_mask = var_3606_squeeze_mask_0, x = var_3603_cast_fp16)[name = string("op_3606_cast_fp16")]; + tensor var_3621_begin_0 = const()[name = string("op_3621_begin_0"), val = tensor([0, 8, 0, 0])]; + tensor var_3621_end_0 = const()[name = string("op_3621_end_0"), val = tensor([1, 9, 1, 1536])]; + tensor var_3621_end_mask_0 = const()[name = string("op_3621_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3621_cast_fp16 = slice_by_index(begin = var_3621_begin_0, end = var_3621_end_0, end_mask = var_3621_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3621_cast_fp16")]; + tensor var_3624_begin_0 = const()[name = string("op_3624_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3624_end_0 = const()[name = string("op_3624_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3624_end_mask_0 = const()[name = string("op_3624_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3624_squeeze_mask_0 = const()[name = string("op_3624_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3624_cast_fp16 = slice_by_index(begin = var_3624_begin_0, end = var_3624_end_0, end_mask = var_3624_end_mask_0, squeeze_mask = var_3624_squeeze_mask_0, x = var_3621_cast_fp16)[name = string("op_3624_cast_fp16")]; + tensor var_3639_begin_0 = const()[name = string("op_3639_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3639_end_0 = const()[name = string("op_3639_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3639_end_mask_0 = const()[name = string("op_3639_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3639_cast_fp16 = slice_by_index(begin = var_3639_begin_0, end = var_3639_end_0, end_mask = var_3639_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3639_cast_fp16")]; + tensor var_3642_begin_0 = const()[name = string("op_3642_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3642_end_0 = const()[name = string("op_3642_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3642_end_mask_0 = const()[name = string("op_3642_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3642_squeeze_mask_0 = const()[name = string("op_3642_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3642_cast_fp16 = slice_by_index(begin = var_3642_begin_0, end = var_3642_end_0, end_mask = var_3642_end_mask_0, squeeze_mask = var_3642_squeeze_mask_0, x = var_3639_cast_fp16)[name = string("op_3642_cast_fp16")]; + tensor var_3657_begin_0 = const()[name = string("op_3657_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3657_end_0 = const()[name = string("op_3657_end_0"), val = tensor([1, 8, 1, 1536])]; + tensor var_3657_end_mask_0 = const()[name = string("op_3657_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3657_cast_fp16 = slice_by_index(begin = var_3657_begin_0, end = var_3657_end_0, end_mask = var_3657_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3657_cast_fp16")]; + tensor var_3660_begin_0 = const()[name = string("op_3660_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3660_end_0 = const()[name = string("op_3660_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3660_end_mask_0 = const()[name = string("op_3660_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3660_squeeze_mask_0 = const()[name = string("op_3660_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3660_cast_fp16 = slice_by_index(begin = var_3660_begin_0, end = var_3660_end_0, end_mask = var_3660_end_mask_0, squeeze_mask = var_3660_squeeze_mask_0, x = var_3657_cast_fp16)[name = string("op_3660_cast_fp16")]; + tensor var_3675_begin_0 = const()[name = string("op_3675_begin_0"), val = tensor([0, 9, 0, 0])]; + tensor var_3675_end_0 = const()[name = string("op_3675_end_0"), val = tensor([1, 10, 1, 1536])]; + tensor var_3675_end_mask_0 = const()[name = string("op_3675_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3675_cast_fp16 = slice_by_index(begin = var_3675_begin_0, end = var_3675_end_0, end_mask = var_3675_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3675_cast_fp16")]; + tensor var_3678_begin_0 = const()[name = string("op_3678_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3678_end_0 = const()[name = string("op_3678_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3678_end_mask_0 = const()[name = string("op_3678_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3678_squeeze_mask_0 = const()[name = string("op_3678_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3678_cast_fp16 = slice_by_index(begin = var_3678_begin_0, end = var_3678_end_0, end_mask = var_3678_end_mask_0, squeeze_mask = var_3678_squeeze_mask_0, x = var_3675_cast_fp16)[name = string("op_3678_cast_fp16")]; + tensor var_3693_begin_0 = const()[name = string("op_3693_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3693_end_0 = const()[name = string("op_3693_end_0"), val = tensor([1, 6, 1, 1536])]; + tensor var_3693_end_mask_0 = const()[name = string("op_3693_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3693_cast_fp16 = slice_by_index(begin = var_3693_begin_0, end = var_3693_end_0, end_mask = var_3693_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3693_cast_fp16")]; + tensor var_3696_begin_0 = const()[name = string("op_3696_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3696_end_0 = const()[name = string("op_3696_end_0"), val = tensor([1, 1, 1, 1536])]; + tensor var_3696_end_mask_0 = const()[name = string("op_3696_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3696_squeeze_mask_0 = const()[name = string("op_3696_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3696_cast_fp16 = slice_by_index(begin = var_3696_begin_0, end = var_3696_end_0, end_mask = var_3696_end_mask_0, squeeze_mask = var_3696_squeeze_mask_0, x = var_3693_cast_fp16)[name = string("op_3696_cast_fp16")]; + int32 var_3703 = const()[name = string("op_3703"), val = int32(1)]; + bool var_3704_interleave_0 = const()[name = string("op_3704_interleave_0"), val = bool(false)]; + tensor var_3704_cast_fp16 = concat(axis = var_3703, interleave = var_3704_interleave_0, values = (var_3534_cast_fp16, var_3552_cast_fp16, var_3570_cast_fp16, var_3588_cast_fp16, var_3606_cast_fp16, var_3624_cast_fp16, var_3642_cast_fp16, var_3660_cast_fp16, var_3678_cast_fp16, var_3696_cast_fp16))[name = string("op_3704_cast_fp16")]; + bool var_3707 = const()[name = string("op_3707"), val = bool(false)]; + tensor obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor([1])]; + tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_3707, x = var_3704_cast_fp16)[name = string("obj_cast_fp16")]; + } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); +} \ No newline at end of file