diff --git "a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" @@ -0,0 +1,2011 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor melspectrogram_features) { + string var_100_pad_type_0 = const()[name = string("op_100_pad_type_0"), val = string("custom")]; + tensor var_100_pad_0 = const()[name = string("op_100_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_100_strides_0 = const()[name = string("op_100_strides_0"), val = tensor([1, 1])]; + tensor var_100_dilations_0 = const()[name = string("op_100_dilations_0"), val = tensor([1, 1])]; + int32 var_100_groups_0 = const()[name = string("op_100_groups_0"), val = int32(1)]; + tensor var_69_to_fp16 = const()[name = string("op_69_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_81_to_fp16 = const()[name = string("op_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))]; + tensor var_100_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_100_dilations_0, groups = var_100_groups_0, pad = var_100_pad_0, pad_type = var_100_pad_type_0, strides = var_100_strides_0, weight = var_69_to_fp16, x = melspectrogram_features)[name = string("op_100_cast_fp16")]; + string var_138_pad_type_0 = const()[name = string("op_138_pad_type_0"), val = string("custom")]; + tensor var_138_pad_0 = const()[name = string("op_138_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_138_strides_0 = const()[name = string("op_138_strides_0"), val = tensor([1, 1])]; + tensor var_138_dilations_0 = const()[name = string("op_138_dilations_0"), val = tensor([1, 1])]; + int32 var_138_groups_0 = const()[name = string("op_138_groups_0"), val = int32(1)]; + tensor op_113_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462592))))[name = string("op_113_to_fp16_palettized")]; + tensor var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462720)))]; + tensor var_138_cast_fp16 = conv(bias = var_119_to_fp16, dilations = var_138_dilations_0, groups = var_138_groups_0, pad = var_138_pad_0, pad_type = var_138_pad_type_0, strides = var_138_strides_0, weight = op_113_to_fp16_palettized, x = melspectrogram_features)[name = string("op_138_cast_fp16")]; + tensor var_140_cast_fp16 = add(x = var_100_cast_fp16, y = var_138_cast_fp16)[name = string("op_140_cast_fp16")]; + string hidden_states_1_mode_0 = const()[name = string("hidden_states_1_mode_0"), val = string("EXACT")]; + tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_140_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; + string var_186_pad_type_0 = const()[name = string("op_186_pad_type_0"), val = string("custom")]; + tensor var_186_pad_0 = const()[name = string("op_186_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_186_strides_0 = const()[name = string("op_186_strides_0"), val = tensor([2, 2])]; + tensor var_186_dilations_0 = const()[name = string("op_186_dilations_0"), val = tensor([1, 1])]; + int32 var_186_groups_0 = const()[name = string("op_186_groups_0"), val = int32(1)]; + tensor var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464320)))]; + tensor var_186_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = var_155_to_fp16, x = hidden_states_1_cast_fp16)[name = string("op_186_cast_fp16")]; + string var_224_pad_type_0 = const()[name = string("op_224_pad_type_0"), val = string("custom")]; + tensor var_224_pad_0 = const()[name = string("op_224_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_224_strides_0 = const()[name = string("op_224_strides_0"), val = tensor([2, 2])]; + tensor var_224_dilations_0 = const()[name = string("op_224_dilations_0"), val = tensor([1, 1])]; + int32 var_224_groups_0 = const()[name = string("op_224_groups_0"), val = int32(1)]; + tensor op_199_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4003328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888128))))[name = string("op_199_to_fp16_palettized")]; + tensor var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888256)))]; + tensor var_224_cast_fp16 = conv(bias = var_205_to_fp16, dilations = var_224_dilations_0, groups = var_224_groups_0, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_224_strides_0, weight = op_199_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = string("op_224_cast_fp16")]; + tensor var_226_cast_fp16 = add(x = var_186_cast_fp16, y = var_224_cast_fp16)[name = string("op_226_cast_fp16")]; + string hidden_states_3_mode_0 = const()[name = string("hidden_states_3_mode_0"), val = string("EXACT")]; + tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_226_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor var_246_to_fp16 = const()[name = string("op_246_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4889856)))]; + tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_246_to_fp16)[name = string("inputs_1_cast_fp16")]; + int32 var_260 = const()[name = string("op_260"), val = int32(3)]; + tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; + fp16 var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_279_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor obj_1_variance_0_to_fp16 = const()[name = string("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7193920)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = string("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7195520)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = string("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7197120)))]; + fp16 obj_1_epsilon_0_to_fp16 = const()[name = string("obj_1_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_1_cast_fp16")]; + string var_301_pad_type_0 = const()[name = string("op_301_pad_type_0"), val = string("valid")]; + tensor var_301_strides_0 = const()[name = string("op_301_strides_0"), val = tensor([1, 1])]; + tensor var_301_pad_0 = const()[name = string("op_301_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_301_dilations_0 = const()[name = string("op_301_dilations_0"), val = tensor([1, 1])]; + int32 var_301_groups_0 = const()[name = string("op_301_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7198720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493696))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493824)))]; + tensor var_301_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_301_dilations_0, groups = var_301_groups_0, pad = var_301_pad_0, pad_type = var_301_pad_type_0, strides = var_301_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_301_cast_fp16")]; + string var_307_pad_type_0 = const()[name = string("op_307_pad_type_0"), val = string("valid")]; + tensor var_307_strides_0 = const()[name = string("op_307_strides_0"), val = tensor([1, 1])]; + tensor var_307_pad_0 = const()[name = string("op_307_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_307_dilations_0 = const()[name = string("op_307_dilations_0"), val = tensor([1, 1])]; + int32 var_307_groups_0 = const()[name = string("op_307_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7520576))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7495424))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_307_cast_fp16 = conv(dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_307_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_301_cast_fp16, y = var_307_cast_fp16)[name = string("query_1_cast_fp16")]; + string var_316_pad_type_0 = const()[name = string("op_316_pad_type_0"), val = string("valid")]; + tensor var_316_strides_0 = const()[name = string("op_316_strides_0"), val = tensor([1, 1])]; + tensor var_316_pad_0 = const()[name = string("op_316_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_316_dilations_0 = const()[name = string("op_316_dilations_0"), val = tensor([1, 1])]; + int32 var_316_groups_0 = const()[name = string("op_316_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7594368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_316_cast_fp16 = conv(dilations = var_316_dilations_0, groups = var_316_groups_0, pad = var_316_pad_0, pad_type = var_316_pad_type_0, strides = var_316_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_316_cast_fp16")]; + string var_322_pad_type_0 = const()[name = string("op_322_pad_type_0"), val = string("valid")]; + tensor var_322_strides_0 = const()[name = string("op_322_strides_0"), val = tensor([1, 1])]; + tensor var_322_pad_0 = const()[name = string("op_322_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_322_dilations_0 = const()[name = string("op_322_dilations_0"), val = tensor([1, 1])]; + int32 var_322_groups_0 = const()[name = string("op_322_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7913600))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889472))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_322_cast_fp16 = conv(dilations = var_322_dilations_0, groups = var_322_groups_0, pad = var_322_pad_0, pad_type = var_322_pad_type_0, strides = var_322_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_322_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_316_cast_fp16, y = var_322_cast_fp16)[name = string("key_1_cast_fp16")]; + string var_332_pad_type_0 = const()[name = string("op_332_pad_type_0"), val = string("valid")]; + tensor var_332_strides_0 = const()[name = string("op_332_strides_0"), val = tensor([1, 1])]; + tensor var_332_pad_0 = const()[name = string("op_332_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_332_dilations_0 = const()[name = string("op_332_dilations_0"), val = tensor([1, 1])]; + int32 var_332_groups_0 = const()[name = string("op_332_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7987392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8282368))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8282496)))]; + tensor var_332_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_332_cast_fp16")]; + string var_338_pad_type_0 = const()[name = string("op_338_pad_type_0"), val = string("valid")]; + tensor var_338_strides_0 = const()[name = string("op_338_strides_0"), val = tensor([1, 1])]; + tensor var_338_pad_0 = const()[name = string("op_338_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_338_dilations_0 = const()[name = string("op_338_dilations_0"), val = tensor([1, 1])]; + int32 var_338_groups_0 = const()[name = string("op_338_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8308864))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8284096))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_338_cast_fp16 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_338_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_332_cast_fp16, y = var_338_cast_fp16)[name = string("value_1_cast_fp16")]; + tensor var_341 = const()[name = string("op_341"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_341, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; + fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1p-3)]; + tensor var_344_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")]; + tensor var_345 = const()[name = string("op_345"), val = tensor([1, 12, 64, -1])]; + tensor var_346_cast_fp16 = reshape(shape = var_345, x = key_1_cast_fp16)[name = string("op_346_cast_fp16")]; + bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; + bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_344_cast_fp16, y = var_346_cast_fp16)[name = string("mh_w_1_cast_fp16")]; + tensor var_349_cast_fp16 = softmax(axis = var_260, x = mh_w_1_cast_fp16)[name = string("op_349_cast_fp16")]; + tensor var_350 = const()[name = string("op_350"), val = tensor([1, 12, 64, -1])]; + tensor var_351_cast_fp16 = reshape(shape = var_350, x = value_1_cast_fp16)[name = string("op_351_cast_fp16")]; + bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; + bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_351_cast_fp16, y = var_349_cast_fp16)[name = string("attn_1_cast_fp16")]; + tensor var_354 = const()[name = string("op_354"), val = tensor([1, 768, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_354, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_364_pad_type_0 = const()[name = string("op_364_pad_type_0"), val = string("valid")]; + tensor var_364_strides_0 = const()[name = string("op_364_strides_0"), val = tensor([1, 1])]; + tensor var_364_pad_0 = const()[name = string("op_364_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_364_dilations_0 = const()[name = string("op_364_dilations_0"), val = tensor([1, 1])]; + int32 var_364_groups_0 = const()[name = string("op_364_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8382656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8677632))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8677760)))]; + tensor var_364_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_364_dilations_0, groups = var_364_groups_0, pad = var_364_pad_0, pad_type = var_364_pad_type_0, strides = var_364_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_364_cast_fp16")]; + string var_370_pad_type_0 = const()[name = string("op_370_pad_type_0"), val = string("valid")]; + tensor var_370_strides_0 = const()[name = string("op_370_strides_0"), val = tensor([1, 1])]; + tensor var_370_pad_0 = const()[name = string("op_370_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_370_dilations_0 = const()[name = string("op_370_dilations_0"), val = tensor([1, 1])]; + int32 var_370_groups_0 = const()[name = string("op_370_groups_0"), val = int32(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8699776))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8679360))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_370_cast_fp16 = conv(dilations = var_370_dilations_0, groups = var_370_groups_0, pad = var_370_pad_0, pad_type = var_370_pad_type_0, strides = var_370_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor obj_3_cast_fp16 = add(x = var_364_cast_fp16, y = var_370_cast_fp16)[name = string("obj_3_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = string("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor([1])]; + fp16 var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_381_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")]; + tensor input_3_gamma_0_to_fp16 = const()[name = string("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8773568)))]; + tensor input_3_beta_0_to_fp16 = const()[name = string("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8775168)))]; + fp16 input_3_epsilon_0_to_fp16 = const()[name = string("input_3_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("input_3_cast_fp16")]; + string var_399_pad_type_0 = const()[name = string("op_399_pad_type_0"), val = string("valid")]; + tensor var_399_strides_0 = const()[name = string("op_399_strides_0"), val = tensor([1, 1])]; + tensor var_399_pad_0 = const()[name = string("op_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_399_dilations_0 = const()[name = string("op_399_dilations_0"), val = tensor([1, 1])]; + int32 var_399_groups_0 = const()[name = string("op_399_groups_0"), val = int32(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8776768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9956480))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9956608)))]; + tensor var_399_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_399_cast_fp16")]; + string var_405_pad_type_0 = const()[name = string("op_405_pad_type_0"), val = string("valid")]; + tensor var_405_strides_0 = const()[name = string("op_405_strides_0"), val = tensor([1, 1])]; + tensor var_405_pad_0 = const()[name = string("op_405_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_405_dilations_0 = const()[name = string("op_405_dilations_0"), val = tensor([1, 1])]; + int32 var_405_groups_0 = const()[name = string("op_405_groups_0"), val = int32(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10057408))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9962816))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_405_cast_fp16 = conv(dilations = var_405_dilations_0, groups = var_405_groups_0, pad = var_405_pad_0, pad_type = var_405_pad_type_0, strides = var_405_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_405_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = var_399_cast_fp16, y = var_405_cast_fp16)[name = string("input_5_cast_fp16")]; + string input_7_mode_0 = const()[name = string("input_7_mode_0"), val = string("EXACT")]; + tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")]; + string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")]; + tensor var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor([1, 1])]; + tensor var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor([1, 1])]; + int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10352384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11532096))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11532224)))]; + tensor var_416_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("op_416_cast_fp16")]; + string var_422_pad_type_0 = const()[name = string("op_422_pad_type_0"), val = string("valid")]; + tensor var_422_strides_0 = const()[name = string("op_422_strides_0"), val = tensor([1, 1])]; + tensor var_422_pad_0 = const()[name = string("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_dilations_0 = const()[name = string("op_422_dilations_0"), val = tensor([1, 1])]; + int32 var_422_groups_0 = const()[name = string("op_422_groups_0"), val = int32(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11613568))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11533824))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_422_cast_fp16 = conv(dilations = var_422_dilations_0, groups = var_422_groups_0, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_422_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = string("op_422_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_416_cast_fp16, y = var_422_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_5_cast_fp16")]; + int32 var_432 = const()[name = string("op_432"), val = int32(3)]; + tensor out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor([1])]; + fp16 var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_451_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")]; + tensor obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11908544)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11910144)))]; + fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("obj_5_cast_fp16")]; + string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")]; + tensor var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor([1, 1])]; + tensor var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor([1, 1])]; + int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11911744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206720))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206848)))]; + tensor var_473_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_473_cast_fp16")]; + string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")]; + tensor var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor([1, 1])]; + tensor var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor([1, 1])]; + int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12226688))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12208448))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("query_3_cast_fp16")]; + string var_488_pad_type_0 = const()[name = string("op_488_pad_type_0"), val = string("valid")]; + tensor var_488_strides_0 = const()[name = string("op_488_strides_0"), val = tensor([1, 1])]; + tensor var_488_pad_0 = const()[name = string("op_488_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_488_dilations_0 = const()[name = string("op_488_dilations_0"), val = tensor([1, 1])]; + int32 var_488_groups_0 = const()[name = string("op_488_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12300480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12595456))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_488_cast_fp16 = conv(dilations = var_488_dilations_0, groups = var_488_groups_0, pad = var_488_pad_0, pad_type = var_488_pad_type_0, strides = var_488_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_488_cast_fp16")]; + string var_494_pad_type_0 = const()[name = string("op_494_pad_type_0"), val = string("valid")]; + tensor var_494_strides_0 = const()[name = string("op_494_strides_0"), val = tensor([1, 1])]; + tensor var_494_pad_0 = const()[name = string("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_dilations_0 = const()[name = string("op_494_dilations_0"), val = tensor([1, 1])]; + int32 var_494_groups_0 = const()[name = string("op_494_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12614464))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12595584))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_494_cast_fp16 = conv(dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_494_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_488_cast_fp16, y = var_494_cast_fp16)[name = string("key_3_cast_fp16")]; + string var_504_pad_type_0 = const()[name = string("op_504_pad_type_0"), val = string("valid")]; + tensor var_504_strides_0 = const()[name = string("op_504_strides_0"), val = tensor([1, 1])]; + tensor var_504_pad_0 = const()[name = string("op_504_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_504_dilations_0 = const()[name = string("op_504_dilations_0"), val = tensor([1, 1])]; + int32 var_504_groups_0 = const()[name = string("op_504_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12688256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12983232))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12983360)))]; + tensor var_504_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_504_cast_fp16")]; + string var_510_pad_type_0 = const()[name = string("op_510_pad_type_0"), val = string("valid")]; + tensor var_510_strides_0 = const()[name = string("op_510_strides_0"), val = tensor([1, 1])]; + tensor var_510_pad_0 = const()[name = string("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_dilations_0 = const()[name = string("op_510_dilations_0"), val = tensor([1, 1])]; + int32 var_510_groups_0 = const()[name = string("op_510_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13000768))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12984960))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_510_cast_fp16 = conv(dilations = var_510_dilations_0, groups = var_510_groups_0, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_510_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_510_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_504_cast_fp16, y = var_510_cast_fp16)[name = string("value_3_cast_fp16")]; + tensor var_513 = const()[name = string("op_513"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_513, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")]; + fp16 var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = fp16(0x1p-3)]; + tensor var_516_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_515_to_fp16)[name = string("op_516_cast_fp16")]; + tensor var_517 = const()[name = string("op_517"), val = tensor([1, 12, 64, -1])]; + tensor var_518_cast_fp16 = reshape(shape = var_517, x = key_3_cast_fp16)[name = string("op_518_cast_fp16")]; + bool mh_w_3_transpose_x_0 = const()[name = string("mh_w_3_transpose_x_0"), val = bool(true)]; + bool mh_w_3_transpose_y_0 = const()[name = string("mh_w_3_transpose_y_0"), val = bool(false)]; + tensor mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_516_cast_fp16, y = var_518_cast_fp16)[name = string("mh_w_3_cast_fp16")]; + tensor var_521_cast_fp16 = softmax(axis = var_432, x = mh_w_3_cast_fp16)[name = string("op_521_cast_fp16")]; + tensor var_522 = const()[name = string("op_522"), val = tensor([1, 12, 64, -1])]; + tensor var_523_cast_fp16 = reshape(shape = var_522, x = value_3_cast_fp16)[name = string("op_523_cast_fp16")]; + bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; + bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_523_cast_fp16, y = var_521_cast_fp16)[name = string("attn_3_cast_fp16")]; + tensor var_526 = const()[name = string("op_526"), val = tensor([1, 768, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_526, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; + string var_536_pad_type_0 = const()[name = string("op_536_pad_type_0"), val = string("valid")]; + tensor var_536_strides_0 = const()[name = string("op_536_strides_0"), val = tensor([1, 1])]; + tensor var_536_pad_0 = const()[name = string("op_536_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_536_dilations_0 = const()[name = string("op_536_dilations_0"), val = tensor([1, 1])]; + int32 var_536_groups_0 = const()[name = string("op_536_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13074560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13369536))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13369664)))]; + tensor var_536_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_536_dilations_0, groups = var_536_groups_0, pad = var_536_pad_0, pad_type = var_536_pad_type_0, strides = var_536_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_536_cast_fp16")]; + string var_542_pad_type_0 = const()[name = string("op_542_pad_type_0"), val = string("valid")]; + tensor var_542_strides_0 = const()[name = string("op_542_strides_0"), val = tensor([1, 1])]; + tensor var_542_pad_0 = const()[name = string("op_542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_542_dilations_0 = const()[name = string("op_542_dilations_0"), val = tensor([1, 1])]; + int32 var_542_groups_0 = const()[name = string("op_542_groups_0"), val = int32(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13384576))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13371264))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_542_cast_fp16 = conv(dilations = var_542_dilations_0, groups = var_542_groups_0, pad = var_542_pad_0, pad_type = var_542_pad_type_0, strides = var_542_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_542_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_536_cast_fp16, y = var_542_cast_fp16)[name = string("obj_7_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = string("inputs_7_cast_fp16")]; + tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; + fp16 var_553_to_fp16 = const()[name = string("op_553_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_553_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor input_11_gamma_0_to_fp16 = const()[name = string("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13458368)))]; + tensor input_11_beta_0_to_fp16 = const()[name = string("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13459968)))]; + fp16 input_11_epsilon_0_to_fp16 = const()[name = string("input_11_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("input_11_cast_fp16")]; + string var_571_pad_type_0 = const()[name = string("op_571_pad_type_0"), val = string("valid")]; + tensor var_571_strides_0 = const()[name = string("op_571_strides_0"), val = tensor([1, 1])]; + tensor var_571_pad_0 = const()[name = string("op_571_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_571_dilations_0 = const()[name = string("op_571_dilations_0"), val = tensor([1, 1])]; + int32 var_571_groups_0 = const()[name = string("op_571_groups_0"), val = int32(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14641280))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14641408)))]; + tensor var_571_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_571_dilations_0, groups = var_571_groups_0, pad = var_571_pad_0, pad_type = var_571_pad_type_0, strides = var_571_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_571_cast_fp16")]; + string var_577_pad_type_0 = const()[name = string("op_577_pad_type_0"), val = string("valid")]; + tensor var_577_strides_0 = const()[name = string("op_577_strides_0"), val = tensor([1, 1])]; + tensor var_577_pad_0 = const()[name = string("op_577_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_577_dilations_0 = const()[name = string("op_577_dilations_0"), val = tensor([1, 1])]; + int32 var_577_groups_0 = const()[name = string("op_577_groups_0"), val = int32(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14721024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647616))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_577_cast_fp16 = conv(dilations = var_577_dilations_0, groups = var_577_groups_0, pad = var_577_pad_0, pad_type = var_577_pad_type_0, strides = var_577_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_577_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_571_cast_fp16, y = var_577_cast_fp16)[name = string("input_13_cast_fp16")]; + string input_15_mode_0 = const()[name = string("input_15_mode_0"), val = string("EXACT")]; + tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")]; + string var_588_pad_type_0 = const()[name = string("op_588_pad_type_0"), val = string("valid")]; + tensor var_588_strides_0 = const()[name = string("op_588_strides_0"), val = tensor([1, 1])]; + tensor var_588_pad_0 = const()[name = string("op_588_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_588_dilations_0 = const()[name = string("op_588_dilations_0"), val = tensor([1, 1])]; + int32 var_588_groups_0 = const()[name = string("op_588_groups_0"), val = int32(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15016000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16195712))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16195840)))]; + tensor var_588_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_588_cast_fp16")]; + string var_594_pad_type_0 = const()[name = string("op_594_pad_type_0"), val = string("valid")]; + tensor var_594_strides_0 = const()[name = string("op_594_strides_0"), val = tensor([1, 1])]; + tensor var_594_pad_0 = const()[name = string("op_594_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_594_dilations_0 = const()[name = string("op_594_dilations_0"), val = tensor([1, 1])]; + int32 var_594_groups_0 = const()[name = string("op_594_groups_0"), val = int32(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16268160))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16197440))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_594_cast_fp16 = conv(dilations = var_594_dilations_0, groups = var_594_groups_0, pad = var_594_pad_0, pad_type = var_594_pad_type_0, strides = var_594_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_594_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_588_cast_fp16, y = var_594_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_9_cast_fp16")]; + int32 var_604 = const()[name = string("op_604"), val = int32(3)]; + tensor out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor([1])]; + fp16 var_623_to_fp16 = const()[name = string("op_623_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_623_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = string("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16563136)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = string("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16564736)))]; + fp16 obj_9_epsilon_0_to_fp16 = const()[name = string("obj_9_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_9_cast_fp16")]; + string var_645_pad_type_0 = const()[name = string("op_645_pad_type_0"), val = string("valid")]; + tensor var_645_strides_0 = const()[name = string("op_645_strides_0"), val = tensor([1, 1])]; + tensor var_645_pad_0 = const()[name = string("op_645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_645_dilations_0 = const()[name = string("op_645_dilations_0"), val = tensor([1, 1])]; + int32 var_645_groups_0 = const()[name = string("op_645_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16566336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16861312))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16861440)))]; + tensor var_645_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_645_cast_fp16")]; + string var_651_pad_type_0 = const()[name = string("op_651_pad_type_0"), val = string("valid")]; + tensor var_651_strides_0 = const()[name = string("op_651_strides_0"), val = tensor([1, 1])]; + tensor var_651_pad_0 = const()[name = string("op_651_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_651_dilations_0 = const()[name = string("op_651_dilations_0"), val = tensor([1, 1])]; + int32 var_651_groups_0 = const()[name = string("op_651_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16878784))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16863040))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_651_cast_fp16 = conv(dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_651_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_645_cast_fp16, y = var_651_cast_fp16)[name = string("query_5_cast_fp16")]; + string var_660_pad_type_0 = const()[name = string("op_660_pad_type_0"), val = string("valid")]; + tensor var_660_strides_0 = const()[name = string("op_660_strides_0"), val = tensor([1, 1])]; + tensor var_660_pad_0 = const()[name = string("op_660_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_660_dilations_0 = const()[name = string("op_660_dilations_0"), val = tensor([1, 1])]; + int32 var_660_groups_0 = const()[name = string("op_660_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16952576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17247552))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_660_cast_fp16 = conv(dilations = var_660_dilations_0, groups = var_660_groups_0, pad = var_660_pad_0, pad_type = var_660_pad_type_0, strides = var_660_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_660_cast_fp16")]; + string var_666_pad_type_0 = const()[name = string("op_666_pad_type_0"), val = string("valid")]; + tensor var_666_strides_0 = const()[name = string("op_666_strides_0"), val = tensor([1, 1])]; + tensor var_666_pad_0 = const()[name = string("op_666_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_666_dilations_0 = const()[name = string("op_666_dilations_0"), val = tensor([1, 1])]; + int32 var_666_groups_0 = const()[name = string("op_666_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17262400))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17247680))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_666_cast_fp16 = conv(dilations = var_666_dilations_0, groups = var_666_groups_0, pad = var_666_pad_0, pad_type = var_666_pad_type_0, strides = var_666_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_660_cast_fp16, y = var_666_cast_fp16)[name = string("key_5_cast_fp16")]; + string var_676_pad_type_0 = const()[name = string("op_676_pad_type_0"), val = string("valid")]; + tensor var_676_strides_0 = const()[name = string("op_676_strides_0"), val = tensor([1, 1])]; + tensor var_676_pad_0 = const()[name = string("op_676_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_676_dilations_0 = const()[name = string("op_676_dilations_0"), val = tensor([1, 1])]; + int32 var_676_groups_0 = const()[name = string("op_676_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17336192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17631168))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17631296)))]; + tensor var_676_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_676_dilations_0, groups = var_676_groups_0, pad = var_676_pad_0, pad_type = var_676_pad_type_0, strides = var_676_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_676_cast_fp16")]; + string var_682_pad_type_0 = const()[name = string("op_682_pad_type_0"), val = string("valid")]; + tensor var_682_strides_0 = const()[name = string("op_682_strides_0"), val = tensor([1, 1])]; + tensor var_682_pad_0 = const()[name = string("op_682_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_682_dilations_0 = const()[name = string("op_682_dilations_0"), val = tensor([1, 1])]; + int32 var_682_groups_0 = const()[name = string("op_682_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17644992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17632896))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_682_cast_fp16 = conv(dilations = var_682_dilations_0, groups = var_682_groups_0, pad = var_682_pad_0, pad_type = var_682_pad_type_0, strides = var_682_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_682_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_676_cast_fp16, y = var_682_cast_fp16)[name = string("value_5_cast_fp16")]; + tensor var_685 = const()[name = string("op_685"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_685, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")]; + fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1p-3)]; + tensor var_688_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_687_to_fp16)[name = string("op_688_cast_fp16")]; + tensor var_689 = const()[name = string("op_689"), val = tensor([1, 12, 64, -1])]; + tensor var_690_cast_fp16 = reshape(shape = var_689, x = key_5_cast_fp16)[name = string("op_690_cast_fp16")]; + bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; + bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_688_cast_fp16, y = var_690_cast_fp16)[name = string("mh_w_5_cast_fp16")]; + tensor var_693_cast_fp16 = softmax(axis = var_604, x = mh_w_5_cast_fp16)[name = string("op_693_cast_fp16")]; + tensor var_694 = const()[name = string("op_694"), val = tensor([1, 12, 64, -1])]; + tensor var_695_cast_fp16 = reshape(shape = var_694, x = value_5_cast_fp16)[name = string("op_695_cast_fp16")]; + bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; + bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_695_cast_fp16, y = var_693_cast_fp16)[name = string("attn_5_cast_fp16")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([1, 768, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_698, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")]; + string var_708_pad_type_0 = const()[name = string("op_708_pad_type_0"), val = string("valid")]; + tensor var_708_strides_0 = const()[name = string("op_708_strides_0"), val = tensor([1, 1])]; + tensor var_708_pad_0 = const()[name = string("op_708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_708_dilations_0 = const()[name = string("op_708_dilations_0"), val = tensor([1, 1])]; + int32 var_708_groups_0 = const()[name = string("op_708_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17718784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18013760))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18013888)))]; + tensor var_708_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("op_708_cast_fp16")]; + string var_714_pad_type_0 = const()[name = string("op_714_pad_type_0"), val = string("valid")]; + tensor var_714_strides_0 = const()[name = string("op_714_strides_0"), val = tensor([1, 1])]; + tensor var_714_pad_0 = const()[name = string("op_714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_714_dilations_0 = const()[name = string("op_714_dilations_0"), val = tensor([1, 1])]; + int32 var_714_groups_0 = const()[name = string("op_714_groups_0"), val = int32(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18025408))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18015488))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_714_cast_fp16 = conv(dilations = var_714_dilations_0, groups = var_714_groups_0, pad = var_714_pad_0, pad_type = var_714_pad_type_0, strides = var_714_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_708_cast_fp16, y = var_714_cast_fp16)[name = string("obj_11_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor([1])]; + fp16 var_725_to_fp16 = const()[name = string("op_725_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_725_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")]; + tensor input_19_gamma_0_to_fp16 = const()[name = string("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18099200)))]; + tensor input_19_beta_0_to_fp16 = const()[name = string("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18100800)))]; + fp16 input_19_epsilon_0_to_fp16 = const()[name = string("input_19_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_19_cast_fp16")]; + string var_743_pad_type_0 = const()[name = string("op_743_pad_type_0"), val = string("valid")]; + tensor var_743_strides_0 = const()[name = string("op_743_strides_0"), val = tensor([1, 1])]; + tensor var_743_pad_0 = const()[name = string("op_743_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_743_dilations_0 = const()[name = string("op_743_dilations_0"), val = tensor([1, 1])]; + int32 var_743_groups_0 = const()[name = string("op_743_groups_0"), val = int32(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18102400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19282112))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19282240)))]; + tensor var_743_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_743_dilations_0, groups = var_743_groups_0, pad = var_743_pad_0, pad_type = var_743_pad_type_0, strides = var_743_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_743_cast_fp16")]; + string var_749_pad_type_0 = const()[name = string("op_749_pad_type_0"), val = string("valid")]; + tensor var_749_strides_0 = const()[name = string("op_749_strides_0"), val = tensor([1, 1])]; + tensor var_749_pad_0 = const()[name = string("op_749_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_749_dilations_0 = const()[name = string("op_749_dilations_0"), val = tensor([1, 1])]; + int32 var_749_groups_0 = const()[name = string("op_749_groups_0"), val = int32(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19352448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19288448))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_749_cast_fp16 = conv(dilations = var_749_dilations_0, groups = var_749_groups_0, pad = var_749_pad_0, pad_type = var_749_pad_type_0, strides = var_749_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_749_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = var_743_cast_fp16, y = var_749_cast_fp16)[name = string("input_21_cast_fp16")]; + string input_23_mode_0 = const()[name = string("input_23_mode_0"), val = string("EXACT")]; + tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = string("input_23_cast_fp16")]; + string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")]; + tensor var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor([1, 1])]; + tensor var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor([1, 1])]; + int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19647424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20827136))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20827264)))]; + tensor var_760_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_760_cast_fp16")]; + string var_766_pad_type_0 = const()[name = string("op_766_pad_type_0"), val = string("valid")]; + tensor var_766_strides_0 = const()[name = string("op_766_strides_0"), val = tensor([1, 1])]; + tensor var_766_pad_0 = const()[name = string("op_766_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_766_dilations_0 = const()[name = string("op_766_dilations_0"), val = tensor([1, 1])]; + int32 var_766_groups_0 = const()[name = string("op_766_groups_0"), val = int32(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20892032))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20828864))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_766_cast_fp16 = conv(dilations = var_766_dilations_0, groups = var_766_groups_0, pad = var_766_pad_0, pad_type = var_766_pad_type_0, strides = var_766_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_766_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_760_cast_fp16, y = var_766_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_13_cast_fp16")]; + int32 var_776 = const()[name = string("op_776"), val = int32(3)]; + tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; + fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_795_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21187008)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21188608)))]; + fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_13_cast_fp16")]; + string var_817_pad_type_0 = const()[name = string("op_817_pad_type_0"), val = string("valid")]; + tensor var_817_strides_0 = const()[name = string("op_817_strides_0"), val = tensor([1, 1])]; + tensor var_817_pad_0 = const()[name = string("op_817_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_817_dilations_0 = const()[name = string("op_817_dilations_0"), val = tensor([1, 1])]; + int32 var_817_groups_0 = const()[name = string("op_817_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21190208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21485184))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21485312)))]; + tensor var_817_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_817_dilations_0, groups = var_817_groups_0, pad = var_817_pad_0, pad_type = var_817_pad_type_0, strides = var_817_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_817_cast_fp16")]; + string var_823_pad_type_0 = const()[name = string("op_823_pad_type_0"), val = string("valid")]; + tensor var_823_strides_0 = const()[name = string("op_823_strides_0"), val = tensor([1, 1])]; + tensor var_823_pad_0 = const()[name = string("op_823_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_823_dilations_0 = const()[name = string("op_823_dilations_0"), val = tensor([1, 1])]; + int32 var_823_groups_0 = const()[name = string("op_823_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21500352))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21486912))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_823_cast_fp16 = conv(dilations = var_823_dilations_0, groups = var_823_groups_0, pad = var_823_pad_0, pad_type = var_823_pad_type_0, strides = var_823_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_823_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_817_cast_fp16, y = var_823_cast_fp16)[name = string("query_7_cast_fp16")]; + string var_832_pad_type_0 = const()[name = string("op_832_pad_type_0"), val = string("valid")]; + tensor var_832_strides_0 = const()[name = string("op_832_strides_0"), val = tensor([1, 1])]; + tensor var_832_pad_0 = const()[name = string("op_832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_832_dilations_0 = const()[name = string("op_832_dilations_0"), val = tensor([1, 1])]; + int32 var_832_groups_0 = const()[name = string("op_832_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21574144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21869120))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_832_cast_fp16 = conv(dilations = var_832_dilations_0, groups = var_832_groups_0, pad = var_832_pad_0, pad_type = var_832_pad_type_0, strides = var_832_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_832_cast_fp16")]; + string var_838_pad_type_0 = const()[name = string("op_838_pad_type_0"), val = string("valid")]; + tensor var_838_strides_0 = const()[name = string("op_838_strides_0"), val = tensor([1, 1])]; + tensor var_838_pad_0 = const()[name = string("op_838_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_838_dilations_0 = const()[name = string("op_838_dilations_0"), val = tensor([1, 1])]; + int32 var_838_groups_0 = const()[name = string("op_838_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21881920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21869248))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_838_cast_fp16 = conv(dilations = var_838_dilations_0, groups = var_838_groups_0, pad = var_838_pad_0, pad_type = var_838_pad_type_0, strides = var_838_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_838_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_832_cast_fp16, y = var_838_cast_fp16)[name = string("key_7_cast_fp16")]; + string var_848_pad_type_0 = const()[name = string("op_848_pad_type_0"), val = string("valid")]; + tensor var_848_strides_0 = const()[name = string("op_848_strides_0"), val = tensor([1, 1])]; + tensor var_848_pad_0 = const()[name = string("op_848_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_848_dilations_0 = const()[name = string("op_848_dilations_0"), val = tensor([1, 1])]; + int32 var_848_groups_0 = const()[name = string("op_848_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21955712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22250688))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22250816)))]; + tensor var_848_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_848_cast_fp16")]; + string var_854_pad_type_0 = const()[name = string("op_854_pad_type_0"), val = string("valid")]; + tensor var_854_strides_0 = const()[name = string("op_854_strides_0"), val = tensor([1, 1])]; + tensor var_854_pad_0 = const()[name = string("op_854_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_854_dilations_0 = const()[name = string("op_854_dilations_0"), val = tensor([1, 1])]; + int32 var_854_groups_0 = const()[name = string("op_854_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22262080))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22252416))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_854_cast_fp16 = conv(dilations = var_854_dilations_0, groups = var_854_groups_0, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_854_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_854_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_848_cast_fp16, y = var_854_cast_fp16)[name = string("value_7_cast_fp16")]; + tensor var_857 = const()[name = string("op_857"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_857, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")]; + fp16 var_859_to_fp16 = const()[name = string("op_859_to_fp16"), val = fp16(0x1p-3)]; + tensor var_860_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_859_to_fp16)[name = string("op_860_cast_fp16")]; + tensor var_861 = const()[name = string("op_861"), val = tensor([1, 12, 64, -1])]; + tensor var_862_cast_fp16 = reshape(shape = var_861, x = key_7_cast_fp16)[name = string("op_862_cast_fp16")]; + bool mh_w_7_transpose_x_0 = const()[name = string("mh_w_7_transpose_x_0"), val = bool(true)]; + bool mh_w_7_transpose_y_0 = const()[name = string("mh_w_7_transpose_y_0"), val = bool(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_860_cast_fp16, y = var_862_cast_fp16)[name = string("mh_w_7_cast_fp16")]; + tensor var_865_cast_fp16 = softmax(axis = var_776, x = mh_w_7_cast_fp16)[name = string("op_865_cast_fp16")]; + tensor var_866 = const()[name = string("op_866"), val = tensor([1, 12, 64, -1])]; + tensor var_867_cast_fp16 = reshape(shape = var_866, x = value_7_cast_fp16)[name = string("op_867_cast_fp16")]; + bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; + bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_867_cast_fp16, y = var_865_cast_fp16)[name = string("attn_7_cast_fp16")]; + tensor var_870 = const()[name = string("op_870"), val = tensor([1, 768, 1, -1])]; + tensor input_25_cast_fp16 = reshape(shape = var_870, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")]; + string var_880_pad_type_0 = const()[name = string("op_880_pad_type_0"), val = string("valid")]; + tensor var_880_strides_0 = const()[name = string("op_880_strides_0"), val = tensor([1, 1])]; + tensor var_880_pad_0 = const()[name = string("op_880_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_880_dilations_0 = const()[name = string("op_880_dilations_0"), val = tensor([1, 1])]; + int32 var_880_groups_0 = const()[name = string("op_880_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22335872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22630848))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22630976)))]; + tensor var_880_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_880_dilations_0, groups = var_880_groups_0, pad = var_880_pad_0, pad_type = var_880_pad_type_0, strides = var_880_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_880_cast_fp16")]; + string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")]; + tensor var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor([1, 1])]; + tensor var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor([1, 1])]; + int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22640640))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22632576))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_886_cast_fp16 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_886_cast_fp16")]; + tensor obj_15_cast_fp16 = add(x = var_880_cast_fp16, y = var_886_cast_fp16)[name = string("obj_15_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = string("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor([1])]; + fp16 var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_897_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")]; + tensor input_27_gamma_0_to_fp16 = const()[name = string("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22714432)))]; + tensor input_27_beta_0_to_fp16 = const()[name = string("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22716032)))]; + fp16 input_27_epsilon_0_to_fp16 = const()[name = string("input_27_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("input_27_cast_fp16")]; + string var_915_pad_type_0 = const()[name = string("op_915_pad_type_0"), val = string("valid")]; + tensor var_915_strides_0 = const()[name = string("op_915_strides_0"), val = tensor([1, 1])]; + tensor var_915_pad_0 = const()[name = string("op_915_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_915_dilations_0 = const()[name = string("op_915_dilations_0"), val = tensor([1, 1])]; + int32 var_915_groups_0 = const()[name = string("op_915_groups_0"), val = int32(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22717632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23897344))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23897472)))]; + tensor var_915_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_915_dilations_0, groups = var_915_groups_0, pad = var_915_pad_0, pad_type = var_915_pad_type_0, strides = var_915_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_915_cast_fp16")]; + string var_921_pad_type_0 = const()[name = string("op_921_pad_type_0"), val = string("valid")]; + tensor var_921_strides_0 = const()[name = string("op_921_strides_0"), val = tensor([1, 1])]; + tensor var_921_pad_0 = const()[name = string("op_921_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_921_dilations_0 = const()[name = string("op_921_dilations_0"), val = tensor([1, 1])]; + int32 var_921_groups_0 = const()[name = string("op_921_groups_0"), val = int32(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23960640))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23903680))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_921_cast_fp16 = conv(dilations = var_921_dilations_0, groups = var_921_groups_0, pad = var_921_pad_0, pad_type = var_921_pad_type_0, strides = var_921_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = string("op_921_cast_fp16")]; + tensor input_29_cast_fp16 = add(x = var_915_cast_fp16, y = var_921_cast_fp16)[name = string("input_29_cast_fp16")]; + string input_31_mode_0 = const()[name = string("input_31_mode_0"), val = string("EXACT")]; + tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")]; + string var_932_pad_type_0 = const()[name = string("op_932_pad_type_0"), val = string("valid")]; + tensor var_932_strides_0 = const()[name = string("op_932_strides_0"), val = tensor([1, 1])]; + tensor var_932_pad_0 = const()[name = string("op_932_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_932_dilations_0 = const()[name = string("op_932_dilations_0"), val = tensor([1, 1])]; + int32 var_932_groups_0 = const()[name = string("op_932_groups_0"), val = int32(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24255616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25435328))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25435456)))]; + tensor var_932_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_932_dilations_0, groups = var_932_groups_0, pad = var_932_pad_0, pad_type = var_932_pad_type_0, strides = var_932_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_932_cast_fp16")]; + string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; + tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; + tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; + int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25492672))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25437056))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_938_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_932_cast_fp16, y = var_938_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_17_cast_fp16")]; + int32 var_948 = const()[name = string("op_948"), val = int32(3)]; + tensor out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor([1])]; + fp16 var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_967_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")]; + tensor obj_17_gamma_0_to_fp16 = const()[name = string("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25787648)))]; + tensor obj_17_beta_0_to_fp16 = const()[name = string("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25789248)))]; + fp16 obj_17_epsilon_0_to_fp16 = const()[name = string("obj_17_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("obj_17_cast_fp16")]; + string var_989_pad_type_0 = const()[name = string("op_989_pad_type_0"), val = string("valid")]; + tensor var_989_strides_0 = const()[name = string("op_989_strides_0"), val = tensor([1, 1])]; + tensor var_989_pad_0 = const()[name = string("op_989_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_989_dilations_0 = const()[name = string("op_989_dilations_0"), val = tensor([1, 1])]; + int32 var_989_groups_0 = const()[name = string("op_989_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25790848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26085824))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26085952)))]; + tensor var_989_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_989_dilations_0, groups = var_989_groups_0, pad = var_989_pad_0, pad_type = var_989_pad_type_0, strides = var_989_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_989_cast_fp16")]; + string var_995_pad_type_0 = const()[name = string("op_995_pad_type_0"), val = string("valid")]; + tensor var_995_strides_0 = const()[name = string("op_995_strides_0"), val = tensor([1, 1])]; + tensor var_995_pad_0 = const()[name = string("op_995_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_995_dilations_0 = const()[name = string("op_995_dilations_0"), val = tensor([1, 1])]; + int32 var_995_groups_0 = const()[name = string("op_995_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26100672))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26087552))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_995_cast_fp16 = conv(dilations = var_995_dilations_0, groups = var_995_groups_0, pad = var_995_pad_0, pad_type = var_995_pad_type_0, strides = var_995_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_995_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_989_cast_fp16, y = var_995_cast_fp16)[name = string("query_9_cast_fp16")]; + string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")]; + tensor var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor([1, 1])]; + tensor var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor([1, 1])]; + int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26174464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26469440))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1004_cast_fp16 = conv(dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1004_cast_fp16")]; + string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")]; + tensor var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor([1, 1])]; + tensor var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor([1, 1])]; + int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26482240))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26469568))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1010_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("key_9_cast_fp16")]; + string var_1020_pad_type_0 = const()[name = string("op_1020_pad_type_0"), val = string("valid")]; + tensor var_1020_strides_0 = const()[name = string("op_1020_strides_0"), val = tensor([1, 1])]; + tensor var_1020_pad_0 = const()[name = string("op_1020_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1020_dilations_0 = const()[name = string("op_1020_dilations_0"), val = tensor([1, 1])]; + int32 var_1020_groups_0 = const()[name = string("op_1020_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26556032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26851008))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26851136)))]; + tensor var_1020_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1020_cast_fp16")]; + string var_1026_pad_type_0 = const()[name = string("op_1026_pad_type_0"), val = string("valid")]; + tensor var_1026_strides_0 = const()[name = string("op_1026_strides_0"), val = tensor([1, 1])]; + tensor var_1026_pad_0 = const()[name = string("op_1026_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1026_dilations_0 = const()[name = string("op_1026_dilations_0"), val = tensor([1, 1])]; + int32 var_1026_groups_0 = const()[name = string("op_1026_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26862144))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26852736))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1026_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1026_cast_fp16)[name = string("value_9_cast_fp16")]; + tensor var_1029 = const()[name = string("op_1029"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_1029, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")]; + fp16 var_1031_to_fp16 = const()[name = string("op_1031_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1032_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1031_to_fp16)[name = string("op_1032_cast_fp16")]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 12, 64, -1])]; + tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = key_9_cast_fp16)[name = string("op_1034_cast_fp16")]; + bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; + bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1032_cast_fp16, y = var_1034_cast_fp16)[name = string("mh_w_9_cast_fp16")]; + tensor var_1037_cast_fp16 = softmax(axis = var_948, x = mh_w_9_cast_fp16)[name = string("op_1037_cast_fp16")]; + tensor var_1038 = const()[name = string("op_1038"), val = tensor([1, 12, 64, -1])]; + tensor var_1039_cast_fp16 = reshape(shape = var_1038, x = value_9_cast_fp16)[name = string("op_1039_cast_fp16")]; + bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; + bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_9_cast_fp16")]; + tensor var_1042 = const()[name = string("op_1042"), val = tensor([1, 768, 1, -1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1042, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")]; + string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")]; + tensor var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor([1, 1])]; + tensor var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor([1, 1])]; + int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26935936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27230912))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27231040)))]; + tensor var_1052_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1052_cast_fp16")]; + string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")]; + tensor var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor([1, 1])]; + tensor var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor([1, 1])]; + int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27241344))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27232640))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1058_cast_fp16")]; + tensor obj_19_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_19_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_19_cast_fp16")]; + tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; + fp16 var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1069_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27315136)))]; + tensor input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27316736)))]; + fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("input_35_cast_fp16")]; + string var_1087_pad_type_0 = const()[name = string("op_1087_pad_type_0"), val = string("valid")]; + tensor var_1087_strides_0 = const()[name = string("op_1087_strides_0"), val = tensor([1, 1])]; + tensor var_1087_pad_0 = const()[name = string("op_1087_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1087_dilations_0 = const()[name = string("op_1087_dilations_0"), val = tensor([1, 1])]; + int32 var_1087_groups_0 = const()[name = string("op_1087_groups_0"), val = int32(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27318336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28498048))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28498176)))]; + tensor var_1087_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1087_dilations_0, groups = var_1087_groups_0, pad = var_1087_pad_0, pad_type = var_1087_pad_type_0, strides = var_1087_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1087_cast_fp16")]; + string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")]; + tensor var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor([1, 1])]; + tensor var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor([1, 1])]; + int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28545344))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28504384))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1093_cast_fp16 = conv(dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1093_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1087_cast_fp16, y = var_1093_cast_fp16)[name = string("input_37_cast_fp16")]; + string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")]; + string var_1104_pad_type_0 = const()[name = string("op_1104_pad_type_0"), val = string("valid")]; + tensor var_1104_strides_0 = const()[name = string("op_1104_strides_0"), val = tensor([1, 1])]; + tensor var_1104_pad_0 = const()[name = string("op_1104_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1104_dilations_0 = const()[name = string("op_1104_dilations_0"), val = tensor([1, 1])]; + int32 var_1104_groups_0 = const()[name = string("op_1104_groups_0"), val = int32(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28840320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30020032))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30020160)))]; + tensor var_1104_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1104_dilations_0, groups = var_1104_groups_0, pad = var_1104_pad_0, pad_type = var_1104_pad_type_0, strides = var_1104_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1104_cast_fp16")]; + string var_1110_pad_type_0 = const()[name = string("op_1110_pad_type_0"), val = string("valid")]; + tensor var_1110_strides_0 = const()[name = string("op_1110_strides_0"), val = tensor([1, 1])]; + tensor var_1110_pad_0 = const()[name = string("op_1110_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1110_dilations_0 = const()[name = string("op_1110_dilations_0"), val = tensor([1, 1])]; + int32 var_1110_groups_0 = const()[name = string("op_1110_groups_0"), val = int32(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30065024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30021760))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1110_cast_fp16 = conv(dilations = var_1110_dilations_0, groups = var_1110_groups_0, pad = var_1110_pad_0, pad_type = var_1110_pad_type_0, strides = var_1110_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1110_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1104_cast_fp16, y = var_1110_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_21_cast_fp16")]; + int32 var_1120 = const()[name = string("op_1120"), val = int32(3)]; + tensor out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor([1])]; + fp16 var_1139_to_fp16 = const()[name = string("op_1139_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1139_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")]; + tensor obj_21_gamma_0_to_fp16 = const()[name = string("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30360000)))]; + tensor obj_21_beta_0_to_fp16 = const()[name = string("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30361600)))]; + fp16 obj_21_epsilon_0_to_fp16 = const()[name = string("obj_21_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_21_cast_fp16")]; + string var_1161_pad_type_0 = const()[name = string("op_1161_pad_type_0"), val = string("valid")]; + tensor var_1161_strides_0 = const()[name = string("op_1161_strides_0"), val = tensor([1, 1])]; + tensor var_1161_pad_0 = const()[name = string("op_1161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1161_dilations_0 = const()[name = string("op_1161_dilations_0"), val = tensor([1, 1])]; + int32 var_1161_groups_0 = const()[name = string("op_1161_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30363200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30658176))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30658304)))]; + tensor var_1161_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1161_cast_fp16")]; + string var_1167_pad_type_0 = const()[name = string("op_1167_pad_type_0"), val = string("valid")]; + tensor var_1167_strides_0 = const()[name = string("op_1167_strides_0"), val = tensor([1, 1])]; + tensor var_1167_pad_0 = const()[name = string("op_1167_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1167_dilations_0 = const()[name = string("op_1167_dilations_0"), val = tensor([1, 1])]; + int32 var_1167_groups_0 = const()[name = string("op_1167_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30670336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30659904))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1167_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = string("query_11_cast_fp16")]; + string var_1176_pad_type_0 = const()[name = string("op_1176_pad_type_0"), val = string("valid")]; + tensor var_1176_strides_0 = const()[name = string("op_1176_strides_0"), val = tensor([1, 1])]; + tensor var_1176_pad_0 = const()[name = string("op_1176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1176_dilations_0 = const()[name = string("op_1176_dilations_0"), val = tensor([1, 1])]; + int32 var_1176_groups_0 = const()[name = string("op_1176_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30744128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31039104))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1176_cast_fp16 = conv(dilations = var_1176_dilations_0, groups = var_1176_groups_0, pad = var_1176_pad_0, pad_type = var_1176_pad_type_0, strides = var_1176_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1176_cast_fp16")]; + string var_1182_pad_type_0 = const()[name = string("op_1182_pad_type_0"), val = string("valid")]; + tensor var_1182_strides_0 = const()[name = string("op_1182_strides_0"), val = tensor([1, 1])]; + tensor var_1182_pad_0 = const()[name = string("op_1182_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1182_dilations_0 = const()[name = string("op_1182_dilations_0"), val = tensor([1, 1])]; + int32 var_1182_groups_0 = const()[name = string("op_1182_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31048960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31039232))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1182_cast_fp16 = conv(dilations = var_1182_dilations_0, groups = var_1182_groups_0, pad = var_1182_pad_0, pad_type = var_1182_pad_type_0, strides = var_1182_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1182_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_1176_cast_fp16, y = var_1182_cast_fp16)[name = string("key_11_cast_fp16")]; + string var_1192_pad_type_0 = const()[name = string("op_1192_pad_type_0"), val = string("valid")]; + tensor var_1192_strides_0 = const()[name = string("op_1192_strides_0"), val = tensor([1, 1])]; + tensor var_1192_pad_0 = const()[name = string("op_1192_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1192_dilations_0 = const()[name = string("op_1192_dilations_0"), val = tensor([1, 1])]; + int32 var_1192_groups_0 = const()[name = string("op_1192_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31122752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31417728))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31417856)))]; + tensor var_1192_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1192_dilations_0, groups = var_1192_groups_0, pad = var_1192_pad_0, pad_type = var_1192_pad_type_0, strides = var_1192_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1192_cast_fp16")]; + string var_1198_pad_type_0 = const()[name = string("op_1198_pad_type_0"), val = string("valid")]; + tensor var_1198_strides_0 = const()[name = string("op_1198_strides_0"), val = tensor([1, 1])]; + tensor var_1198_pad_0 = const()[name = string("op_1198_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1198_dilations_0 = const()[name = string("op_1198_dilations_0"), val = tensor([1, 1])]; + int32 var_1198_groups_0 = const()[name = string("op_1198_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31426368))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31419456))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1198_cast_fp16 = conv(dilations = var_1198_dilations_0, groups = var_1198_groups_0, pad = var_1198_pad_0, pad_type = var_1198_pad_type_0, strides = var_1198_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1198_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_1192_cast_fp16, y = var_1198_cast_fp16)[name = string("value_11_cast_fp16")]; + tensor var_1201 = const()[name = string("op_1201"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_1201, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")]; + fp16 var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1204_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1203_to_fp16)[name = string("op_1204_cast_fp16")]; + tensor var_1205 = const()[name = string("op_1205"), val = tensor([1, 12, 64, -1])]; + tensor var_1206_cast_fp16 = reshape(shape = var_1205, x = key_11_cast_fp16)[name = string("op_1206_cast_fp16")]; + bool mh_w_11_transpose_x_0 = const()[name = string("mh_w_11_transpose_x_0"), val = bool(true)]; + bool mh_w_11_transpose_y_0 = const()[name = string("mh_w_11_transpose_y_0"), val = bool(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1204_cast_fp16, y = var_1206_cast_fp16)[name = string("mh_w_11_cast_fp16")]; + tensor var_1209_cast_fp16 = softmax(axis = var_1120, x = mh_w_11_cast_fp16)[name = string("op_1209_cast_fp16")]; + tensor var_1210 = const()[name = string("op_1210"), val = tensor([1, 12, 64, -1])]; + tensor var_1211_cast_fp16 = reshape(shape = var_1210, x = value_11_cast_fp16)[name = string("op_1211_cast_fp16")]; + bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; + bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1211_cast_fp16, y = var_1209_cast_fp16)[name = string("attn_11_cast_fp16")]; + tensor var_1214 = const()[name = string("op_1214"), val = tensor([1, 768, 1, -1])]; + tensor input_41_cast_fp16 = reshape(shape = var_1214, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")]; + string var_1224_pad_type_0 = const()[name = string("op_1224_pad_type_0"), val = string("valid")]; + tensor var_1224_strides_0 = const()[name = string("op_1224_strides_0"), val = tensor([1, 1])]; + tensor var_1224_pad_0 = const()[name = string("op_1224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1224_dilations_0 = const()[name = string("op_1224_dilations_0"), val = tensor([1, 1])]; + int32 var_1224_groups_0 = const()[name = string("op_1224_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31500160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31795136))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31795264)))]; + tensor var_1224_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1224_dilations_0, groups = var_1224_groups_0, pad = var_1224_pad_0, pad_type = var_1224_pad_type_0, strides = var_1224_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1224_cast_fp16")]; + string var_1230_pad_type_0 = const()[name = string("op_1230_pad_type_0"), val = string("valid")]; + tensor var_1230_strides_0 = const()[name = string("op_1230_strides_0"), val = tensor([1, 1])]; + tensor var_1230_pad_0 = const()[name = string("op_1230_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1230_dilations_0 = const()[name = string("op_1230_dilations_0"), val = tensor([1, 1])]; + int32 var_1230_groups_0 = const()[name = string("op_1230_groups_0"), val = int32(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31804736))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31796864))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1230_cast_fp16 = conv(dilations = var_1230_dilations_0, groups = var_1230_groups_0, pad = var_1230_pad_0, pad_type = var_1230_pad_type_0, strides = var_1230_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1230_cast_fp16")]; + tensor obj_23_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1230_cast_fp16)[name = string("obj_23_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = string("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor([1])]; + fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1241_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")]; + tensor input_43_gamma_0_to_fp16 = const()[name = string("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31878528)))]; + tensor input_43_beta_0_to_fp16 = const()[name = string("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31880128)))]; + fp16 input_43_epsilon_0_to_fp16 = const()[name = string("input_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_43_cast_fp16")]; + string var_1259_pad_type_0 = const()[name = string("op_1259_pad_type_0"), val = string("valid")]; + tensor var_1259_strides_0 = const()[name = string("op_1259_strides_0"), val = tensor([1, 1])]; + tensor var_1259_pad_0 = const()[name = string("op_1259_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1259_dilations_0 = const()[name = string("op_1259_dilations_0"), val = tensor([1, 1])]; + int32 var_1259_groups_0 = const()[name = string("op_1259_groups_0"), val = int32(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31881728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33061440))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33061568)))]; + tensor var_1259_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1259_dilations_0, groups = var_1259_groups_0, pad = var_1259_pad_0, pad_type = var_1259_pad_type_0, strides = var_1259_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1259_cast_fp16")]; + string var_1265_pad_type_0 = const()[name = string("op_1265_pad_type_0"), val = string("valid")]; + tensor var_1265_strides_0 = const()[name = string("op_1265_strides_0"), val = tensor([1, 1])]; + tensor var_1265_pad_0 = const()[name = string("op_1265_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1265_dilations_0 = const()[name = string("op_1265_dilations_0"), val = tensor([1, 1])]; + int32 var_1265_groups_0 = const()[name = string("op_1265_groups_0"), val = int32(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33101248))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33067776))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1265_cast_fp16 = conv(dilations = var_1265_dilations_0, groups = var_1265_groups_0, pad = var_1265_pad_0, pad_type = var_1265_pad_type_0, strides = var_1265_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1265_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = var_1259_cast_fp16, y = var_1265_cast_fp16)[name = string("input_45_cast_fp16")]; + string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("EXACT")]; + tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")]; + string var_1276_pad_type_0 = const()[name = string("op_1276_pad_type_0"), val = string("valid")]; + tensor var_1276_strides_0 = const()[name = string("op_1276_strides_0"), val = tensor([1, 1])]; + tensor var_1276_pad_0 = const()[name = string("op_1276_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1276_dilations_0 = const()[name = string("op_1276_dilations_0"), val = tensor([1, 1])]; + int32 var_1276_groups_0 = const()[name = string("op_1276_groups_0"), val = int32(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33396224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34575936))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34576064)))]; + tensor var_1276_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1276_dilations_0, groups = var_1276_groups_0, pad = var_1276_pad_0, pad_type = var_1276_pad_type_0, strides = var_1276_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_1276_cast_fp16")]; + string var_1282_pad_type_0 = const()[name = string("op_1282_pad_type_0"), val = string("valid")]; + tensor var_1282_strides_0 = const()[name = string("op_1282_strides_0"), val = tensor([1, 1])]; + tensor var_1282_pad_0 = const()[name = string("op_1282_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1282_dilations_0 = const()[name = string("op_1282_dilations_0"), val = tensor([1, 1])]; + int32 var_1282_groups_0 = const()[name = string("op_1282_groups_0"), val = int32(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34614208))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34577664))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1282_cast_fp16 = conv(dilations = var_1282_dilations_0, groups = var_1282_groups_0, pad = var_1282_pad_0, pad_type = var_1282_pad_type_0, strides = var_1282_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = string("op_1282_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_1276_cast_fp16, y = var_1282_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_25_cast_fp16")]; + int32 var_1292 = const()[name = string("op_1292"), val = int32(3)]; + tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; + fp16 var_1311_to_fp16 = const()[name = string("op_1311_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1311_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34909184)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34910784)))]; + fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_25_cast_fp16")]; + string var_1333_pad_type_0 = const()[name = string("op_1333_pad_type_0"), val = string("valid")]; + tensor var_1333_strides_0 = const()[name = string("op_1333_strides_0"), val = tensor([1, 1])]; + tensor var_1333_pad_0 = const()[name = string("op_1333_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1333_dilations_0 = const()[name = string("op_1333_dilations_0"), val = tensor([1, 1])]; + int32 var_1333_groups_0 = const()[name = string("op_1333_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34912384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35207360))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35207488)))]; + tensor var_1333_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1333_dilations_0, groups = var_1333_groups_0, pad = var_1333_pad_0, pad_type = var_1333_pad_type_0, strides = var_1333_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1333_cast_fp16")]; + string var_1339_pad_type_0 = const()[name = string("op_1339_pad_type_0"), val = string("valid")]; + tensor var_1339_strides_0 = const()[name = string("op_1339_strides_0"), val = tensor([1, 1])]; + tensor var_1339_pad_0 = const()[name = string("op_1339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1339_dilations_0 = const()[name = string("op_1339_dilations_0"), val = tensor([1, 1])]; + int32 var_1339_groups_0 = const()[name = string("op_1339_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35217280))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35209088))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1339_cast_fp16 = conv(dilations = var_1339_dilations_0, groups = var_1339_groups_0, pad = var_1339_pad_0, pad_type = var_1339_pad_type_0, strides = var_1339_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1339_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1333_cast_fp16, y = var_1339_cast_fp16)[name = string("query_13_cast_fp16")]; + string var_1348_pad_type_0 = const()[name = string("op_1348_pad_type_0"), val = string("valid")]; + tensor var_1348_strides_0 = const()[name = string("op_1348_strides_0"), val = tensor([1, 1])]; + tensor var_1348_pad_0 = const()[name = string("op_1348_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1348_dilations_0 = const()[name = string("op_1348_dilations_0"), val = tensor([1, 1])]; + int32 var_1348_groups_0 = const()[name = string("op_1348_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35291072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35586048))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1348_cast_fp16 = conv(dilations = var_1348_dilations_0, groups = var_1348_groups_0, pad = var_1348_pad_0, pad_type = var_1348_pad_type_0, strides = var_1348_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1348_cast_fp16")]; + string var_1354_pad_type_0 = const()[name = string("op_1354_pad_type_0"), val = string("valid")]; + tensor var_1354_strides_0 = const()[name = string("op_1354_strides_0"), val = tensor([1, 1])]; + tensor var_1354_pad_0 = const()[name = string("op_1354_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1354_dilations_0 = const()[name = string("op_1354_dilations_0"), val = tensor([1, 1])]; + int32 var_1354_groups_0 = const()[name = string("op_1354_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35593984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35586176))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1354_cast_fp16 = conv(dilations = var_1354_dilations_0, groups = var_1354_groups_0, pad = var_1354_pad_0, pad_type = var_1354_pad_type_0, strides = var_1354_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1354_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1348_cast_fp16, y = var_1354_cast_fp16)[name = string("key_13_cast_fp16")]; + string var_1364_pad_type_0 = const()[name = string("op_1364_pad_type_0"), val = string("valid")]; + tensor var_1364_strides_0 = const()[name = string("op_1364_strides_0"), val = tensor([1, 1])]; + tensor var_1364_pad_0 = const()[name = string("op_1364_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1364_dilations_0 = const()[name = string("op_1364_dilations_0"), val = tensor([1, 1])]; + int32 var_1364_groups_0 = const()[name = string("op_1364_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35667776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35962752))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35962880)))]; + tensor var_1364_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1364_dilations_0, groups = var_1364_groups_0, pad = var_1364_pad_0, pad_type = var_1364_pad_type_0, strides = var_1364_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1364_cast_fp16")]; + string var_1370_pad_type_0 = const()[name = string("op_1370_pad_type_0"), val = string("valid")]; + tensor var_1370_strides_0 = const()[name = string("op_1370_strides_0"), val = tensor([1, 1])]; + tensor var_1370_pad_0 = const()[name = string("op_1370_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1370_dilations_0 = const()[name = string("op_1370_dilations_0"), val = tensor([1, 1])]; + int32 var_1370_groups_0 = const()[name = string("op_1370_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35970496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35964480))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1370_cast_fp16 = conv(dilations = var_1370_dilations_0, groups = var_1370_groups_0, pad = var_1370_pad_0, pad_type = var_1370_pad_type_0, strides = var_1370_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1370_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1370_cast_fp16)[name = string("value_13_cast_fp16")]; + tensor var_1373 = const()[name = string("op_1373"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1373, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")]; + fp16 var_1375_to_fp16 = const()[name = string("op_1375_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1376_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1375_to_fp16)[name = string("op_1376_cast_fp16")]; + tensor var_1377 = const()[name = string("op_1377"), val = tensor([1, 12, 64, -1])]; + tensor var_1378_cast_fp16 = reshape(shape = var_1377, x = key_13_cast_fp16)[name = string("op_1378_cast_fp16")]; + bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; + bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1376_cast_fp16, y = var_1378_cast_fp16)[name = string("mh_w_13_cast_fp16")]; + tensor var_1381_cast_fp16 = softmax(axis = var_1292, x = mh_w_13_cast_fp16)[name = string("op_1381_cast_fp16")]; + tensor var_1382 = const()[name = string("op_1382"), val = tensor([1, 12, 64, -1])]; + tensor var_1383_cast_fp16 = reshape(shape = var_1382, x = value_13_cast_fp16)[name = string("op_1383_cast_fp16")]; + bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; + bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1383_cast_fp16, y = var_1381_cast_fp16)[name = string("attn_13_cast_fp16")]; + tensor var_1386 = const()[name = string("op_1386"), val = tensor([1, 768, 1, -1])]; + tensor input_49_cast_fp16 = reshape(shape = var_1386, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")]; + string var_1396_pad_type_0 = const()[name = string("op_1396_pad_type_0"), val = string("valid")]; + tensor var_1396_strides_0 = const()[name = string("op_1396_strides_0"), val = tensor([1, 1])]; + tensor var_1396_pad_0 = const()[name = string("op_1396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1396_dilations_0 = const()[name = string("op_1396_dilations_0"), val = tensor([1, 1])]; + int32 var_1396_groups_0 = const()[name = string("op_1396_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36044288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36339264))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36339392)))]; + tensor var_1396_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1396_dilations_0, groups = var_1396_groups_0, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1396_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1396_cast_fp16")]; + string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; + tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; + tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; + int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36346496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36340992))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1402_cast_fp16 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1402_cast_fp16")]; + tensor obj_27_cast_fp16 = add(x = var_1396_cast_fp16, y = var_1402_cast_fp16)[name = string("obj_27_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor([1])]; + fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1413_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")]; + tensor input_51_gamma_0_to_fp16 = const()[name = string("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36420288)))]; + tensor input_51_beta_0_to_fp16 = const()[name = string("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36421888)))]; + fp16 input_51_epsilon_0_to_fp16 = const()[name = string("input_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("input_51_cast_fp16")]; + string var_1431_pad_type_0 = const()[name = string("op_1431_pad_type_0"), val = string("valid")]; + tensor var_1431_strides_0 = const()[name = string("op_1431_strides_0"), val = tensor([1, 1])]; + tensor var_1431_pad_0 = const()[name = string("op_1431_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1431_dilations_0 = const()[name = string("op_1431_dilations_0"), val = tensor([1, 1])]; + int32 var_1431_groups_0 = const()[name = string("op_1431_groups_0"), val = int32(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36423488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37603200))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37603328)))]; + tensor var_1431_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1431_dilations_0, groups = var_1431_groups_0, pad = var_1431_pad_0, pad_type = var_1431_pad_type_0, strides = var_1431_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1431_cast_fp16")]; + string var_1437_pad_type_0 = const()[name = string("op_1437_pad_type_0"), val = string("valid")]; + tensor var_1437_strides_0 = const()[name = string("op_1437_strides_0"), val = tensor([1, 1])]; + tensor var_1437_pad_0 = const()[name = string("op_1437_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1437_dilations_0 = const()[name = string("op_1437_dilations_0"), val = tensor([1, 1])]; + int32 var_1437_groups_0 = const()[name = string("op_1437_groups_0"), val = int32(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37638976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37609536))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1437_cast_fp16 = conv(dilations = var_1437_dilations_0, groups = var_1437_groups_0, pad = var_1437_pad_0, pad_type = var_1437_pad_type_0, strides = var_1437_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1437_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = var_1431_cast_fp16, y = var_1437_cast_fp16)[name = string("input_53_cast_fp16")]; + string input_55_mode_0 = const()[name = string("input_55_mode_0"), val = string("EXACT")]; + tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = string("input_55_cast_fp16")]; + string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")]; + tensor var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor([1, 1])]; + tensor var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor([1, 1])]; + int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37933952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39113664))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39113792)))]; + tensor var_1448_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1448_cast_fp16")]; + string var_1454_pad_type_0 = const()[name = string("op_1454_pad_type_0"), val = string("valid")]; + tensor var_1454_strides_0 = const()[name = string("op_1454_strides_0"), val = tensor([1, 1])]; + tensor var_1454_pad_0 = const()[name = string("op_1454_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1454_dilations_0 = const()[name = string("op_1454_dilations_0"), val = tensor([1, 1])]; + int32 var_1454_groups_0 = const()[name = string("op_1454_groups_0"), val = int32(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39142720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39115392))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1454_cast_fp16 = conv(dilations = var_1454_dilations_0, groups = var_1454_groups_0, pad = var_1454_pad_0, pad_type = var_1454_pad_type_0, strides = var_1454_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1454_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1454_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_29_cast_fp16")]; + int32 var_1464 = const()[name = string("op_1464"), val = int32(3)]; + tensor out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor([1])]; + fp16 var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1483_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = string("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39437696)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = string("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39439296)))]; + fp16 obj_29_epsilon_0_to_fp16 = const()[name = string("obj_29_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("obj_29_cast_fp16")]; + string var_1505_pad_type_0 = const()[name = string("op_1505_pad_type_0"), val = string("valid")]; + tensor var_1505_strides_0 = const()[name = string("op_1505_strides_0"), val = tensor([1, 1])]; + tensor var_1505_pad_0 = const()[name = string("op_1505_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1505_dilations_0 = const()[name = string("op_1505_dilations_0"), val = tensor([1, 1])]; + int32 var_1505_groups_0 = const()[name = string("op_1505_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39440896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39735872))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39736000)))]; + tensor var_1505_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1505_dilations_0, groups = var_1505_groups_0, pad = var_1505_pad_0, pad_type = var_1505_pad_type_0, strides = var_1505_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1505_cast_fp16")]; + string var_1511_pad_type_0 = const()[name = string("op_1511_pad_type_0"), val = string("valid")]; + tensor var_1511_strides_0 = const()[name = string("op_1511_strides_0"), val = tensor([1, 1])]; + tensor var_1511_pad_0 = const()[name = string("op_1511_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1511_dilations_0 = const()[name = string("op_1511_dilations_0"), val = tensor([1, 1])]; + int32 var_1511_groups_0 = const()[name = string("op_1511_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39745536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39737600))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1511_cast_fp16 = conv(dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1511_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1505_cast_fp16, y = var_1511_cast_fp16)[name = string("query_15_cast_fp16")]; + string var_1520_pad_type_0 = const()[name = string("op_1520_pad_type_0"), val = string("valid")]; + tensor var_1520_strides_0 = const()[name = string("op_1520_strides_0"), val = tensor([1, 1])]; + tensor var_1520_pad_0 = const()[name = string("op_1520_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1520_dilations_0 = const()[name = string("op_1520_dilations_0"), val = tensor([1, 1])]; + int32 var_1520_groups_0 = const()[name = string("op_1520_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39819328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40114304))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1520_cast_fp16 = conv(dilations = var_1520_dilations_0, groups = var_1520_groups_0, pad = var_1520_pad_0, pad_type = var_1520_pad_type_0, strides = var_1520_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1520_cast_fp16")]; + string var_1526_pad_type_0 = const()[name = string("op_1526_pad_type_0"), val = string("valid")]; + tensor var_1526_strides_0 = const()[name = string("op_1526_strides_0"), val = tensor([1, 1])]; + tensor var_1526_pad_0 = const()[name = string("op_1526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1526_dilations_0 = const()[name = string("op_1526_dilations_0"), val = tensor([1, 1])]; + int32 var_1526_groups_0 = const()[name = string("op_1526_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40123840))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40114432))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1526_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1520_cast_fp16, y = var_1526_cast_fp16)[name = string("key_15_cast_fp16")]; + string var_1536_pad_type_0 = const()[name = string("op_1536_pad_type_0"), val = string("valid")]; + tensor var_1536_strides_0 = const()[name = string("op_1536_strides_0"), val = tensor([1, 1])]; + tensor var_1536_pad_0 = const()[name = string("op_1536_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1536_dilations_0 = const()[name = string("op_1536_dilations_0"), val = tensor([1, 1])]; + int32 var_1536_groups_0 = const()[name = string("op_1536_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40197632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40492608))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40492736)))]; + tensor var_1536_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1536_dilations_0, groups = var_1536_groups_0, pad = var_1536_pad_0, pad_type = var_1536_pad_type_0, strides = var_1536_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1536_cast_fp16")]; + string var_1542_pad_type_0 = const()[name = string("op_1542_pad_type_0"), val = string("valid")]; + tensor var_1542_strides_0 = const()[name = string("op_1542_strides_0"), val = tensor([1, 1])]; + tensor var_1542_pad_0 = const()[name = string("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1542_dilations_0 = const()[name = string("op_1542_dilations_0"), val = tensor([1, 1])]; + int32 var_1542_groups_0 = const()[name = string("op_1542_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40503360))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40494336))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1542_cast_fp16 = conv(dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1542_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1536_cast_fp16, y = var_1542_cast_fp16)[name = string("value_15_cast_fp16")]; + tensor var_1545 = const()[name = string("op_1545"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1545, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")]; + fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1548_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1547_to_fp16)[name = string("op_1548_cast_fp16")]; + tensor var_1549 = const()[name = string("op_1549"), val = tensor([1, 12, 64, -1])]; + tensor var_1550_cast_fp16 = reshape(shape = var_1549, x = key_15_cast_fp16)[name = string("op_1550_cast_fp16")]; + bool mh_w_15_transpose_x_0 = const()[name = string("mh_w_15_transpose_x_0"), val = bool(true)]; + bool mh_w_15_transpose_y_0 = const()[name = string("mh_w_15_transpose_y_0"), val = bool(false)]; + tensor mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1548_cast_fp16, y = var_1550_cast_fp16)[name = string("mh_w_15_cast_fp16")]; + tensor var_1553_cast_fp16 = softmax(axis = var_1464, x = mh_w_15_cast_fp16)[name = string("op_1553_cast_fp16")]; + tensor var_1554 = const()[name = string("op_1554"), val = tensor([1, 12, 64, -1])]; + tensor var_1555_cast_fp16 = reshape(shape = var_1554, x = value_15_cast_fp16)[name = string("op_1555_cast_fp16")]; + bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)]; + bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1555_cast_fp16, y = var_1553_cast_fp16)[name = string("attn_15_cast_fp16")]; + tensor var_1558 = const()[name = string("op_1558"), val = tensor([1, 768, 1, -1])]; + tensor input_57_cast_fp16 = reshape(shape = var_1558, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")]; + string var_1568_pad_type_0 = const()[name = string("op_1568_pad_type_0"), val = string("valid")]; + tensor var_1568_strides_0 = const()[name = string("op_1568_strides_0"), val = tensor([1, 1])]; + tensor var_1568_pad_0 = const()[name = string("op_1568_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1568_dilations_0 = const()[name = string("op_1568_dilations_0"), val = tensor([1, 1])]; + int32 var_1568_groups_0 = const()[name = string("op_1568_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40577152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40872128))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40872256)))]; + tensor var_1568_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("op_1568_cast_fp16")]; + string var_1574_pad_type_0 = const()[name = string("op_1574_pad_type_0"), val = string("valid")]; + tensor var_1574_strides_0 = const()[name = string("op_1574_strides_0"), val = tensor([1, 1])]; + tensor var_1574_pad_0 = const()[name = string("op_1574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1574_dilations_0 = const()[name = string("op_1574_dilations_0"), val = tensor([1, 1])]; + int32 var_1574_groups_0 = const()[name = string("op_1574_groups_0"), val = int32(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40885568))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40873856))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1574_cast_fp16 = conv(dilations = var_1574_dilations_0, groups = var_1574_groups_0, pad = var_1574_pad_0, pad_type = var_1574_pad_type_0, strides = var_1574_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = string("op_1574_cast_fp16")]; + tensor obj_31_cast_fp16 = add(x = var_1568_cast_fp16, y = var_1574_cast_fp16)[name = string("obj_31_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_31_cast_fp16")]; + tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; + fp16 var_1585_to_fp16 = const()[name = string("op_1585_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1585_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor input_59_gamma_0_to_fp16 = const()[name = string("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40959360)))]; + tensor input_59_beta_0_to_fp16 = const()[name = string("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40960960)))]; + fp16 input_59_epsilon_0_to_fp16 = const()[name = string("input_59_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("input_59_cast_fp16")]; + string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")]; + tensor var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor([1, 1])]; + tensor var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor([1, 1])]; + int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40962560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42142272))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42142400)))]; + tensor var_1603_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1603_cast_fp16")]; + string var_1609_pad_type_0 = const()[name = string("op_1609_pad_type_0"), val = string("valid")]; + tensor var_1609_strides_0 = const()[name = string("op_1609_strides_0"), val = tensor([1, 1])]; + tensor var_1609_pad_0 = const()[name = string("op_1609_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1609_dilations_0 = const()[name = string("op_1609_dilations_0"), val = tensor([1, 1])]; + int32 var_1609_groups_0 = const()[name = string("op_1609_groups_0"), val = int32(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42172224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42148608))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1609_cast_fp16 = conv(dilations = var_1609_dilations_0, groups = var_1609_groups_0, pad = var_1609_pad_0, pad_type = var_1609_pad_type_0, strides = var_1609_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1609_cast_fp16")]; + tensor input_61_cast_fp16 = add(x = var_1603_cast_fp16, y = var_1609_cast_fp16)[name = string("input_61_cast_fp16")]; + string input_63_mode_0 = const()[name = string("input_63_mode_0"), val = string("EXACT")]; + tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = string("input_63_cast_fp16")]; + string var_1620_pad_type_0 = const()[name = string("op_1620_pad_type_0"), val = string("valid")]; + tensor var_1620_strides_0 = const()[name = string("op_1620_strides_0"), val = tensor([1, 1])]; + tensor var_1620_pad_0 = const()[name = string("op_1620_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1620_dilations_0 = const()[name = string("op_1620_dilations_0"), val = tensor([1, 1])]; + int32 var_1620_groups_0 = const()[name = string("op_1620_groups_0"), val = int32(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42467200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43646912))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43647040)))]; + tensor var_1620_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1620_dilations_0, groups = var_1620_groups_0, pad = var_1620_pad_0, pad_type = var_1620_pad_type_0, strides = var_1620_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_1620_cast_fp16")]; + string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")]; + tensor var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor([1, 1])]; + tensor var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor([1, 1])]; + int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43671552))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43648640))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1626_cast_fp16 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_1626_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_1620_cast_fp16, y = var_1626_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_33_cast_fp16")]; + int32 var_1636 = const()[name = string("op_1636"), val = int32(3)]; + tensor out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor([1])]; + fp16 var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1655_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43966528)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43968128)))]; + fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_33_cast_fp16")]; + string var_1677_pad_type_0 = const()[name = string("op_1677_pad_type_0"), val = string("valid")]; + tensor var_1677_strides_0 = const()[name = string("op_1677_strides_0"), val = tensor([1, 1])]; + tensor var_1677_pad_0 = const()[name = string("op_1677_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1677_dilations_0 = const()[name = string("op_1677_dilations_0"), val = tensor([1, 1])]; + int32 var_1677_groups_0 = const()[name = string("op_1677_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43969728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44264704))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44264832)))]; + tensor var_1677_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1677_dilations_0, groups = var_1677_groups_0, pad = var_1677_pad_0, pad_type = var_1677_pad_type_0, strides = var_1677_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1677_cast_fp16")]; + string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")]; + tensor var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor([1, 1])]; + tensor var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor([1, 1])]; + int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44272704))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44266432))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1683_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1677_cast_fp16, y = var_1683_cast_fp16)[name = string("query_17_cast_fp16")]; + string var_1692_pad_type_0 = const()[name = string("op_1692_pad_type_0"), val = string("valid")]; + tensor var_1692_strides_0 = const()[name = string("op_1692_strides_0"), val = tensor([1, 1])]; + tensor var_1692_pad_0 = const()[name = string("op_1692_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1692_dilations_0 = const()[name = string("op_1692_dilations_0"), val = tensor([1, 1])]; + int32 var_1692_groups_0 = const()[name = string("op_1692_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44346496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44641472))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1692_cast_fp16 = conv(dilations = var_1692_dilations_0, groups = var_1692_groups_0, pad = var_1692_pad_0, pad_type = var_1692_pad_type_0, strides = var_1692_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1692_cast_fp16")]; + string var_1698_pad_type_0 = const()[name = string("op_1698_pad_type_0"), val = string("valid")]; + tensor var_1698_strides_0 = const()[name = string("op_1698_strides_0"), val = tensor([1, 1])]; + tensor var_1698_pad_0 = const()[name = string("op_1698_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1698_dilations_0 = const()[name = string("op_1698_dilations_0"), val = tensor([1, 1])]; + int32 var_1698_groups_0 = const()[name = string("op_1698_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44648384))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44641600))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1698_cast_fp16 = conv(dilations = var_1698_dilations_0, groups = var_1698_groups_0, pad = var_1698_pad_0, pad_type = var_1698_pad_type_0, strides = var_1698_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1698_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1692_cast_fp16, y = var_1698_cast_fp16)[name = string("key_17_cast_fp16")]; + string var_1708_pad_type_0 = const()[name = string("op_1708_pad_type_0"), val = string("valid")]; + tensor var_1708_strides_0 = const()[name = string("op_1708_strides_0"), val = tensor([1, 1])]; + tensor var_1708_pad_0 = const()[name = string("op_1708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1708_dilations_0 = const()[name = string("op_1708_dilations_0"), val = tensor([1, 1])]; + int32 var_1708_groups_0 = const()[name = string("op_1708_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44722176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45017152))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45017280)))]; + tensor var_1708_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1708_dilations_0, groups = var_1708_groups_0, pad = var_1708_pad_0, pad_type = var_1708_pad_type_0, strides = var_1708_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1708_cast_fp16")]; + string var_1714_pad_type_0 = const()[name = string("op_1714_pad_type_0"), val = string("valid")]; + tensor var_1714_strides_0 = const()[name = string("op_1714_strides_0"), val = tensor([1, 1])]; + tensor var_1714_pad_0 = const()[name = string("op_1714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1714_dilations_0 = const()[name = string("op_1714_dilations_0"), val = tensor([1, 1])]; + int32 var_1714_groups_0 = const()[name = string("op_1714_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45025152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45018880))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1714_cast_fp16 = conv(dilations = var_1714_dilations_0, groups = var_1714_groups_0, pad = var_1714_pad_0, pad_type = var_1714_pad_type_0, strides = var_1714_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1714_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1708_cast_fp16, y = var_1714_cast_fp16)[name = string("value_17_cast_fp16")]; + tensor var_1717 = const()[name = string("op_1717"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1717, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")]; + fp16 var_1719_to_fp16 = const()[name = string("op_1719_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1720_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1719_to_fp16)[name = string("op_1720_cast_fp16")]; + tensor var_1721 = const()[name = string("op_1721"), val = tensor([1, 12, 64, -1])]; + tensor var_1722_cast_fp16 = reshape(shape = var_1721, x = key_17_cast_fp16)[name = string("op_1722_cast_fp16")]; + bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; + bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1720_cast_fp16, y = var_1722_cast_fp16)[name = string("mh_w_17_cast_fp16")]; + tensor var_1725_cast_fp16 = softmax(axis = var_1636, x = mh_w_17_cast_fp16)[name = string("op_1725_cast_fp16")]; + tensor var_1726 = const()[name = string("op_1726"), val = tensor([1, 12, 64, -1])]; + tensor var_1727_cast_fp16 = reshape(shape = var_1726, x = value_17_cast_fp16)[name = string("op_1727_cast_fp16")]; + bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)]; + bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1727_cast_fp16, y = var_1725_cast_fp16)[name = string("attn_17_cast_fp16")]; + tensor var_1730 = const()[name = string("op_1730"), val = tensor([1, 768, 1, -1])]; + tensor input_65_cast_fp16 = reshape(shape = var_1730, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")]; + string var_1740_pad_type_0 = const()[name = string("op_1740_pad_type_0"), val = string("valid")]; + tensor var_1740_strides_0 = const()[name = string("op_1740_strides_0"), val = tensor([1, 1])]; + tensor var_1740_pad_0 = const()[name = string("op_1740_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1740_dilations_0 = const()[name = string("op_1740_dilations_0"), val = tensor([1, 1])]; + int32 var_1740_groups_0 = const()[name = string("op_1740_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45098944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45393920))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45394048)))]; + tensor var_1740_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_1740_cast_fp16")]; + string var_1746_pad_type_0 = const()[name = string("op_1746_pad_type_0"), val = string("valid")]; + tensor var_1746_strides_0 = const()[name = string("op_1746_strides_0"), val = tensor([1, 1])]; + tensor var_1746_pad_0 = const()[name = string("op_1746_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1746_dilations_0 = const()[name = string("op_1746_dilations_0"), val = tensor([1, 1])]; + int32 var_1746_groups_0 = const()[name = string("op_1746_groups_0"), val = int32(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45402880))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45395648))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1746_cast_fp16 = conv(dilations = var_1746_dilations_0, groups = var_1746_groups_0, pad = var_1746_pad_0, pad_type = var_1746_pad_type_0, strides = var_1746_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_1746_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1746_cast_fp16)[name = string("obj_35_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor([1])]; + fp16 var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1757_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")]; + tensor input_67_gamma_0_to_fp16 = const()[name = string("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45476672)))]; + tensor input_67_beta_0_to_fp16 = const()[name = string("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45478272)))]; + fp16 input_67_epsilon_0_to_fp16 = const()[name = string("input_67_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_67_cast_fp16")]; + string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")]; + tensor var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor([1, 1])]; + tensor var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor([1, 1])]; + int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45479872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46659584))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46659712)))]; + tensor var_1775_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_1775_cast_fp16")]; + string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")]; + tensor var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor([1, 1])]; + tensor var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor([1, 1])]; + int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46692096))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46665920))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = string("op_1781_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("input_69_cast_fp16")]; + string input_71_mode_0 = const()[name = string("input_71_mode_0"), val = string("EXACT")]; + tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = string("input_71_cast_fp16")]; + string var_1792_pad_type_0 = const()[name = string("op_1792_pad_type_0"), val = string("valid")]; + tensor var_1792_strides_0 = const()[name = string("op_1792_strides_0"), val = tensor([1, 1])]; + tensor var_1792_pad_0 = const()[name = string("op_1792_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1792_dilations_0 = const()[name = string("op_1792_dilations_0"), val = tensor([1, 1])]; + int32 var_1792_groups_0 = const()[name = string("op_1792_groups_0"), val = int32(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46987072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48166784))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48166912)))]; + tensor var_1792_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_1792_cast_fp16")]; + string var_1798_pad_type_0 = const()[name = string("op_1798_pad_type_0"), val = string("valid")]; + tensor var_1798_strides_0 = const()[name = string("op_1798_strides_0"), val = tensor([1, 1])]; + tensor var_1798_pad_0 = const()[name = string("op_1798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1798_dilations_0 = const()[name = string("op_1798_dilations_0"), val = tensor([1, 1])]; + int32 var_1798_groups_0 = const()[name = string("op_1798_groups_0"), val = int32(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48189952))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48168512))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1798_cast_fp16 = conv(dilations = var_1798_dilations_0, groups = var_1798_groups_0, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1798_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_1798_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_1792_cast_fp16, y = var_1798_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_37_cast_fp16")]; + int32 var_1808 = const()[name = string("op_1808"), val = int32(3)]; + tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; + fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1827_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = string("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48484928)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = string("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48486528)))]; + fp16 obj_37_epsilon_0_to_fp16 = const()[name = string("obj_37_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_37_cast_fp16")]; + string var_1849_pad_type_0 = const()[name = string("op_1849_pad_type_0"), val = string("valid")]; + tensor var_1849_strides_0 = const()[name = string("op_1849_strides_0"), val = tensor([1, 1])]; + tensor var_1849_pad_0 = const()[name = string("op_1849_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1849_dilations_0 = const()[name = string("op_1849_dilations_0"), val = tensor([1, 1])]; + int32 var_1849_groups_0 = const()[name = string("op_1849_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48488128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48783104))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48783232)))]; + tensor var_1849_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1849_dilations_0, groups = var_1849_groups_0, pad = var_1849_pad_0, pad_type = var_1849_pad_type_0, strides = var_1849_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1849_cast_fp16")]; + string var_1855_pad_type_0 = const()[name = string("op_1855_pad_type_0"), val = string("valid")]; + tensor var_1855_strides_0 = const()[name = string("op_1855_strides_0"), val = tensor([1, 1])]; + tensor var_1855_pad_0 = const()[name = string("op_1855_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1855_dilations_0 = const()[name = string("op_1855_dilations_0"), val = tensor([1, 1])]; + int32 var_1855_groups_0 = const()[name = string("op_1855_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48790784))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48784832))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1855_cast_fp16 = conv(dilations = var_1855_dilations_0, groups = var_1855_groups_0, pad = var_1855_pad_0, pad_type = var_1855_pad_type_0, strides = var_1855_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1849_cast_fp16, y = var_1855_cast_fp16)[name = string("query_19_cast_fp16")]; + string var_1864_pad_type_0 = const()[name = string("op_1864_pad_type_0"), val = string("valid")]; + tensor var_1864_strides_0 = const()[name = string("op_1864_strides_0"), val = tensor([1, 1])]; + tensor var_1864_pad_0 = const()[name = string("op_1864_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1864_dilations_0 = const()[name = string("op_1864_dilations_0"), val = tensor([1, 1])]; + int32 var_1864_groups_0 = const()[name = string("op_1864_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48864576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49159552))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_1864_cast_fp16 = conv(dilations = var_1864_dilations_0, groups = var_1864_groups_0, pad = var_1864_pad_0, pad_type = var_1864_pad_type_0, strides = var_1864_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1864_cast_fp16")]; + string var_1870_pad_type_0 = const()[name = string("op_1870_pad_type_0"), val = string("valid")]; + tensor var_1870_strides_0 = const()[name = string("op_1870_strides_0"), val = tensor([1, 1])]; + tensor var_1870_pad_0 = const()[name = string("op_1870_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1870_dilations_0 = const()[name = string("op_1870_dilations_0"), val = tensor([1, 1])]; + int32 var_1870_groups_0 = const()[name = string("op_1870_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49166528))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49159680))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1870_cast_fp16 = conv(dilations = var_1870_dilations_0, groups = var_1870_groups_0, pad = var_1870_pad_0, pad_type = var_1870_pad_type_0, strides = var_1870_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1870_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1864_cast_fp16, y = var_1870_cast_fp16)[name = string("key_19_cast_fp16")]; + string var_1880_pad_type_0 = const()[name = string("op_1880_pad_type_0"), val = string("valid")]; + tensor var_1880_strides_0 = const()[name = string("op_1880_strides_0"), val = tensor([1, 1])]; + tensor var_1880_pad_0 = const()[name = string("op_1880_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1880_dilations_0 = const()[name = string("op_1880_dilations_0"), val = tensor([1, 1])]; + int32 var_1880_groups_0 = const()[name = string("op_1880_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49240320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49535296))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49535424)))]; + tensor var_1880_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1880_dilations_0, groups = var_1880_groups_0, pad = var_1880_pad_0, pad_type = var_1880_pad_type_0, strides = var_1880_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1880_cast_fp16")]; + string var_1886_pad_type_0 = const()[name = string("op_1886_pad_type_0"), val = string("valid")]; + tensor var_1886_strides_0 = const()[name = string("op_1886_strides_0"), val = tensor([1, 1])]; + tensor var_1886_pad_0 = const()[name = string("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1886_dilations_0 = const()[name = string("op_1886_dilations_0"), val = tensor([1, 1])]; + int32 var_1886_groups_0 = const()[name = string("op_1886_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49541568))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49537024))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1886_cast_fp16 = conv(dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1886_cast_fp16)[name = string("value_19_cast_fp16")]; + tensor var_1889 = const()[name = string("op_1889"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1889, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")]; + fp16 var_1891_to_fp16 = const()[name = string("op_1891_to_fp16"), val = fp16(0x1p-3)]; + tensor var_1892_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1891_to_fp16)[name = string("op_1892_cast_fp16")]; + tensor var_1893 = const()[name = string("op_1893"), val = tensor([1, 12, 64, -1])]; + tensor var_1894_cast_fp16 = reshape(shape = var_1893, x = key_19_cast_fp16)[name = string("op_1894_cast_fp16")]; + bool mh_w_19_transpose_x_0 = const()[name = string("mh_w_19_transpose_x_0"), val = bool(true)]; + bool mh_w_19_transpose_y_0 = const()[name = string("mh_w_19_transpose_y_0"), val = bool(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1892_cast_fp16, y = var_1894_cast_fp16)[name = string("mh_w_19_cast_fp16")]; + tensor var_1897_cast_fp16 = softmax(axis = var_1808, x = mh_w_19_cast_fp16)[name = string("op_1897_cast_fp16")]; + tensor var_1898 = const()[name = string("op_1898"), val = tensor([1, 12, 64, -1])]; + tensor var_1899_cast_fp16 = reshape(shape = var_1898, x = value_19_cast_fp16)[name = string("op_1899_cast_fp16")]; + bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)]; + bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1899_cast_fp16, y = var_1897_cast_fp16)[name = string("attn_19_cast_fp16")]; + tensor var_1902 = const()[name = string("op_1902"), val = tensor([1, 768, 1, -1])]; + tensor input_73_cast_fp16 = reshape(shape = var_1902, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")]; + string var_1912_pad_type_0 = const()[name = string("op_1912_pad_type_0"), val = string("valid")]; + tensor var_1912_strides_0 = const()[name = string("op_1912_strides_0"), val = tensor([1, 1])]; + tensor var_1912_pad_0 = const()[name = string("op_1912_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1912_dilations_0 = const()[name = string("op_1912_dilations_0"), val = tensor([1, 1])]; + int32 var_1912_groups_0 = const()[name = string("op_1912_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49615360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49910336))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49910464)))]; + tensor var_1912_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1912_dilations_0, groups = var_1912_groups_0, pad = var_1912_pad_0, pad_type = var_1912_pad_type_0, strides = var_1912_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_1912_cast_fp16")]; + string var_1918_pad_type_0 = const()[name = string("op_1918_pad_type_0"), val = string("valid")]; + tensor var_1918_strides_0 = const()[name = string("op_1918_strides_0"), val = tensor([1, 1])]; + tensor var_1918_pad_0 = const()[name = string("op_1918_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1918_dilations_0 = const()[name = string("op_1918_dilations_0"), val = tensor([1, 1])]; + int32 var_1918_groups_0 = const()[name = string("op_1918_groups_0"), val = int32(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49916928))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49912064))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1918_cast_fp16 = conv(dilations = var_1918_dilations_0, groups = var_1918_groups_0, pad = var_1918_pad_0, pad_type = var_1918_pad_type_0, strides = var_1918_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_1918_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_1912_cast_fp16, y = var_1918_cast_fp16)[name = string("obj_39_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor([1])]; + fp16 var_1929_to_fp16 = const()[name = string("op_1929_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1929_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49990720)))]; + tensor input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49992320)))]; + fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("input_75_cast_fp16")]; + string var_1947_pad_type_0 = const()[name = string("op_1947_pad_type_0"), val = string("valid")]; + tensor var_1947_strides_0 = const()[name = string("op_1947_strides_0"), val = tensor([1, 1])]; + tensor var_1947_pad_0 = const()[name = string("op_1947_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1947_dilations_0 = const()[name = string("op_1947_dilations_0"), val = tensor([1, 1])]; + int32 var_1947_groups_0 = const()[name = string("op_1947_groups_0"), val = int32(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49993920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51173632))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51173760)))]; + tensor var_1947_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1947_dilations_0, groups = var_1947_groups_0, pad = var_1947_pad_0, pad_type = var_1947_pad_type_0, strides = var_1947_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_1947_cast_fp16")]; + string var_1953_pad_type_0 = const()[name = string("op_1953_pad_type_0"), val = string("valid")]; + tensor var_1953_strides_0 = const()[name = string("op_1953_strides_0"), val = tensor([1, 1])]; + tensor var_1953_pad_0 = const()[name = string("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_dilations_0 = const()[name = string("op_1953_dilations_0"), val = tensor([1, 1])]; + int32 var_1953_groups_0 = const()[name = string("op_1953_groups_0"), val = int32(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51201984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51179968))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1953_cast_fp16 = conv(dilations = var_1953_dilations_0, groups = var_1953_groups_0, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1953_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_1953_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_1947_cast_fp16, y = var_1953_cast_fp16)[name = string("input_77_cast_fp16")]; + string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")]; + string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")]; + tensor var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor([1, 1])]; + tensor var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor([1, 1])]; + int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51496960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52676672))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52676800)))]; + tensor var_1964_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_1964_cast_fp16")]; + string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")]; + tensor var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor([1, 1])]; + tensor var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor([1, 1])]; + int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52698816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52678400))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_1970_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_41_cast_fp16")]; + int32 var_1980 = const()[name = string("op_1980"), val = int32(3)]; + tensor out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor([1])]; + fp16 var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1999_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")]; + tensor obj_41_gamma_0_to_fp16 = const()[name = string("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52993792)))]; + tensor obj_41_beta_0_to_fp16 = const()[name = string("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52995392)))]; + fp16 obj_41_epsilon_0_to_fp16 = const()[name = string("obj_41_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("obj_41_cast_fp16")]; + string var_2021_pad_type_0 = const()[name = string("op_2021_pad_type_0"), val = string("valid")]; + tensor var_2021_strides_0 = const()[name = string("op_2021_strides_0"), val = tensor([1, 1])]; + tensor var_2021_pad_0 = const()[name = string("op_2021_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2021_dilations_0 = const()[name = string("op_2021_dilations_0"), val = tensor([1, 1])]; + int32 var_2021_groups_0 = const()[name = string("op_2021_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52996992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53291968))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53292096)))]; + tensor var_2021_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2021_dilations_0, groups = var_2021_groups_0, pad = var_2021_pad_0, pad_type = var_2021_pad_type_0, strides = var_2021_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2021_cast_fp16")]; + string var_2027_pad_type_0 = const()[name = string("op_2027_pad_type_0"), val = string("valid")]; + tensor var_2027_strides_0 = const()[name = string("op_2027_strides_0"), val = tensor([1, 1])]; + tensor var_2027_pad_0 = const()[name = string("op_2027_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2027_dilations_0 = const()[name = string("op_2027_dilations_0"), val = tensor([1, 1])]; + int32 var_2027_groups_0 = const()[name = string("op_2027_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53299776))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53293696))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2027_cast_fp16 = conv(dilations = var_2027_dilations_0, groups = var_2027_groups_0, pad = var_2027_pad_0, pad_type = var_2027_pad_type_0, strides = var_2027_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2027_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_2021_cast_fp16, y = var_2027_cast_fp16)[name = string("query_21_cast_fp16")]; + string var_2036_pad_type_0 = const()[name = string("op_2036_pad_type_0"), val = string("valid")]; + tensor var_2036_strides_0 = const()[name = string("op_2036_strides_0"), val = tensor([1, 1])]; + tensor var_2036_pad_0 = const()[name = string("op_2036_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2036_dilations_0 = const()[name = string("op_2036_dilations_0"), val = tensor([1, 1])]; + int32 var_2036_groups_0 = const()[name = string("op_2036_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53373568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53668544))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2036_cast_fp16 = conv(dilations = var_2036_dilations_0, groups = var_2036_groups_0, pad = var_2036_pad_0, pad_type = var_2036_pad_type_0, strides = var_2036_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2036_cast_fp16")]; + string var_2042_pad_type_0 = const()[name = string("op_2042_pad_type_0"), val = string("valid")]; + tensor var_2042_strides_0 = const()[name = string("op_2042_strides_0"), val = tensor([1, 1])]; + tensor var_2042_pad_0 = const()[name = string("op_2042_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2042_dilations_0 = const()[name = string("op_2042_dilations_0"), val = tensor([1, 1])]; + int32 var_2042_groups_0 = const()[name = string("op_2042_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53675072))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53668672))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2042_cast_fp16 = conv(dilations = var_2042_dilations_0, groups = var_2042_groups_0, pad = var_2042_pad_0, pad_type = var_2042_pad_type_0, strides = var_2042_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2042_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_2036_cast_fp16, y = var_2042_cast_fp16)[name = string("key_21_cast_fp16")]; + string var_2052_pad_type_0 = const()[name = string("op_2052_pad_type_0"), val = string("valid")]; + tensor var_2052_strides_0 = const()[name = string("op_2052_strides_0"), val = tensor([1, 1])]; + tensor var_2052_pad_0 = const()[name = string("op_2052_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2052_dilations_0 = const()[name = string("op_2052_dilations_0"), val = tensor([1, 1])]; + int32 var_2052_groups_0 = const()[name = string("op_2052_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53748864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54043840))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54043968)))]; + tensor var_2052_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2052_cast_fp16")]; + string var_2058_pad_type_0 = const()[name = string("op_2058_pad_type_0"), val = string("valid")]; + tensor var_2058_strides_0 = const()[name = string("op_2058_strides_0"), val = tensor([1, 1])]; + tensor var_2058_pad_0 = const()[name = string("op_2058_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2058_dilations_0 = const()[name = string("op_2058_dilations_0"), val = tensor([1, 1])]; + int32 var_2058_groups_0 = const()[name = string("op_2058_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54050496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54045568))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2058_cast_fp16 = conv(dilations = var_2058_dilations_0, groups = var_2058_groups_0, pad = var_2058_pad_0, pad_type = var_2058_pad_type_0, strides = var_2058_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2058_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_2052_cast_fp16, y = var_2058_cast_fp16)[name = string("value_21_cast_fp16")]; + tensor var_2061 = const()[name = string("op_2061"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_2061, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")]; + fp16 var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2064_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2063_to_fp16)[name = string("op_2064_cast_fp16")]; + tensor var_2065 = const()[name = string("op_2065"), val = tensor([1, 12, 64, -1])]; + tensor var_2066_cast_fp16 = reshape(shape = var_2065, x = key_21_cast_fp16)[name = string("op_2066_cast_fp16")]; + bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; + bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2064_cast_fp16, y = var_2066_cast_fp16)[name = string("mh_w_21_cast_fp16")]; + tensor var_2069_cast_fp16 = softmax(axis = var_1980, x = mh_w_21_cast_fp16)[name = string("op_2069_cast_fp16")]; + tensor var_2070 = const()[name = string("op_2070"), val = tensor([1, 12, 64, -1])]; + tensor var_2071_cast_fp16 = reshape(shape = var_2070, x = value_21_cast_fp16)[name = string("op_2071_cast_fp16")]; + bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)]; + bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2071_cast_fp16, y = var_2069_cast_fp16)[name = string("attn_21_cast_fp16")]; + tensor var_2074 = const()[name = string("op_2074"), val = tensor([1, 768, 1, -1])]; + tensor input_81_cast_fp16 = reshape(shape = var_2074, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")]; + string var_2084_pad_type_0 = const()[name = string("op_2084_pad_type_0"), val = string("valid")]; + tensor var_2084_strides_0 = const()[name = string("op_2084_strides_0"), val = tensor([1, 1])]; + tensor var_2084_pad_0 = const()[name = string("op_2084_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2084_dilations_0 = const()[name = string("op_2084_dilations_0"), val = tensor([1, 1])]; + int32 var_2084_groups_0 = const()[name = string("op_2084_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54124288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54419264))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54419392)))]; + tensor var_2084_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2084_dilations_0, groups = var_2084_groups_0, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2084_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2084_cast_fp16")]; + string var_2090_pad_type_0 = const()[name = string("op_2090_pad_type_0"), val = string("valid")]; + tensor var_2090_strides_0 = const()[name = string("op_2090_strides_0"), val = tensor([1, 1])]; + tensor var_2090_pad_0 = const()[name = string("op_2090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2090_dilations_0 = const()[name = string("op_2090_dilations_0"), val = tensor([1, 1])]; + int32 var_2090_groups_0 = const()[name = string("op_2090_groups_0"), val = int32(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54426624))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54420992))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2090_cast_fp16 = conv(dilations = var_2090_dilations_0, groups = var_2090_groups_0, pad = var_2090_pad_0, pad_type = var_2090_pad_type_0, strides = var_2090_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2090_cast_fp16")]; + tensor obj_43_cast_fp16 = add(x = var_2084_cast_fp16, y = var_2090_cast_fp16)[name = string("obj_43_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_43_cast_fp16")]; + tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; + fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2101_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor input_83_gamma_0_to_fp16 = const()[name = string("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54500416)))]; + tensor input_83_beta_0_to_fp16 = const()[name = string("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54502016)))]; + fp16 input_83_epsilon_0_to_fp16 = const()[name = string("input_83_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("input_83_cast_fp16")]; + string var_2119_pad_type_0 = const()[name = string("op_2119_pad_type_0"), val = string("valid")]; + tensor var_2119_strides_0 = const()[name = string("op_2119_strides_0"), val = tensor([1, 1])]; + tensor var_2119_pad_0 = const()[name = string("op_2119_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2119_dilations_0 = const()[name = string("op_2119_dilations_0"), val = tensor([1, 1])]; + int32 var_2119_groups_0 = const()[name = string("op_2119_groups_0"), val = int32(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54503616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55683328))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55683456)))]; + tensor var_2119_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2119_dilations_0, groups = var_2119_groups_0, pad = var_2119_pad_0, pad_type = var_2119_pad_type_0, strides = var_2119_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2119_cast_fp16")]; + string var_2125_pad_type_0 = const()[name = string("op_2125_pad_type_0"), val = string("valid")]; + tensor var_2125_strides_0 = const()[name = string("op_2125_strides_0"), val = tensor([1, 1])]; + tensor var_2125_pad_0 = const()[name = string("op_2125_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2125_dilations_0 = const()[name = string("op_2125_dilations_0"), val = tensor([1, 1])]; + int32 var_2125_groups_0 = const()[name = string("op_2125_groups_0"), val = int32(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55711488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55689664))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2125_cast_fp16 = conv(dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2125_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = var_2119_cast_fp16, y = var_2125_cast_fp16)[name = string("input_85_cast_fp16")]; + string input_87_mode_0 = const()[name = string("input_87_mode_0"), val = string("EXACT")]; + tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = string("input_87_cast_fp16")]; + string var_2136_pad_type_0 = const()[name = string("op_2136_pad_type_0"), val = string("valid")]; + tensor var_2136_strides_0 = const()[name = string("op_2136_strides_0"), val = tensor([1, 1])]; + tensor var_2136_pad_0 = const()[name = string("op_2136_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2136_dilations_0 = const()[name = string("op_2136_dilations_0"), val = tensor([1, 1])]; + int32 var_2136_groups_0 = const()[name = string("op_2136_groups_0"), val = int32(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56006464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57186176))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57186304)))]; + tensor var_2136_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2136_dilations_0, groups = var_2136_groups_0, pad = var_2136_pad_0, pad_type = var_2136_pad_type_0, strides = var_2136_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("op_2136_cast_fp16")]; + string var_2142_pad_type_0 = const()[name = string("op_2142_pad_type_0"), val = string("valid")]; + tensor var_2142_strides_0 = const()[name = string("op_2142_strides_0"), val = tensor([1, 1])]; + tensor var_2142_pad_0 = const()[name = string("op_2142_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2142_dilations_0 = const()[name = string("op_2142_dilations_0"), val = tensor([1, 1])]; + int32 var_2142_groups_0 = const()[name = string("op_2142_groups_0"), val = int32(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57213952))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57187904))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2142_cast_fp16 = conv(dilations = var_2142_dilations_0, groups = var_2142_groups_0, pad = var_2142_pad_0, pad_type = var_2142_pad_type_0, strides = var_2142_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = string("op_2142_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2142_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_45_cast_fp16")]; + int32 var_2152 = const()[name = string("op_2152"), val = int32(3)]; + tensor out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor([1])]; + fp16 var_2171_to_fp16 = const()[name = string("op_2171_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2171_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")]; + tensor obj_45_gamma_0_to_fp16 = const()[name = string("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57508928)))]; + tensor obj_45_beta_0_to_fp16 = const()[name = string("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57510528)))]; + fp16 obj_45_epsilon_0_to_fp16 = const()[name = string("obj_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_45_cast_fp16")]; + string var_2193_pad_type_0 = const()[name = string("op_2193_pad_type_0"), val = string("valid")]; + tensor var_2193_strides_0 = const()[name = string("op_2193_strides_0"), val = tensor([1, 1])]; + tensor var_2193_pad_0 = const()[name = string("op_2193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2193_dilations_0 = const()[name = string("op_2193_dilations_0"), val = tensor([1, 1])]; + int32 var_2193_groups_0 = const()[name = string("op_2193_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57512128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57807104))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57807232)))]; + tensor var_2193_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2193_dilations_0, groups = var_2193_groups_0, pad = var_2193_pad_0, pad_type = var_2193_pad_type_0, strides = var_2193_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2193_cast_fp16")]; + string var_2199_pad_type_0 = const()[name = string("op_2199_pad_type_0"), val = string("valid")]; + tensor var_2199_strides_0 = const()[name = string("op_2199_strides_0"), val = tensor([1, 1])]; + tensor var_2199_pad_0 = const()[name = string("op_2199_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2199_dilations_0 = const()[name = string("op_2199_dilations_0"), val = tensor([1, 1])]; + int32 var_2199_groups_0 = const()[name = string("op_2199_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57815296))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808832))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2199_cast_fp16 = conv(dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2199_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_2193_cast_fp16, y = var_2199_cast_fp16)[name = string("query_cast_fp16")]; + string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")]; + tensor var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor([1, 1])]; + tensor var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor([1, 1])]; + int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57889088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58184064))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2208_cast_fp16 = conv(dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2208_cast_fp16")]; + string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")]; + tensor var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor([1, 1])]; + tensor var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor([1, 1])]; + int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58191872))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58184192))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2214_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("key_cast_fp16")]; + string var_2224_pad_type_0 = const()[name = string("op_2224_pad_type_0"), val = string("valid")]; + tensor var_2224_strides_0 = const()[name = string("op_2224_strides_0"), val = tensor([1, 1])]; + tensor var_2224_pad_0 = const()[name = string("op_2224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2224_dilations_0 = const()[name = string("op_2224_dilations_0"), val = tensor([1, 1])]; + int32 var_2224_groups_0 = const()[name = string("op_2224_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58265664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58560640))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58560768)))]; + tensor var_2224_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2224_dilations_0, groups = var_2224_groups_0, pad = var_2224_pad_0, pad_type = var_2224_pad_type_0, strides = var_2224_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2224_cast_fp16")]; + string var_2230_pad_type_0 = const()[name = string("op_2230_pad_type_0"), val = string("valid")]; + tensor var_2230_strides_0 = const()[name = string("op_2230_strides_0"), val = tensor([1, 1])]; + tensor var_2230_pad_0 = const()[name = string("op_2230_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2230_dilations_0 = const()[name = string("op_2230_dilations_0"), val = tensor([1, 1])]; + int32 var_2230_groups_0 = const()[name = string("op_2230_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58567232))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58562368))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2230_cast_fp16 = conv(dilations = var_2230_dilations_0, groups = var_2230_groups_0, pad = var_2230_pad_0, pad_type = var_2230_pad_type_0, strides = var_2230_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2230_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_2224_cast_fp16, y = var_2230_cast_fp16)[name = string("value_cast_fp16")]; + tensor var_2233 = const()[name = string("op_2233"), val = tensor([1, 12, 64, -1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_2233, x = query_cast_fp16)[name = string("mh_q_cast_fp16")]; + fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1p-3)]; + tensor var_2236_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2235_to_fp16)[name = string("op_2236_cast_fp16")]; + tensor var_2237 = const()[name = string("op_2237"), val = tensor([1, 12, 64, -1])]; + tensor var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = string("op_2238_cast_fp16")]; + bool mh_w_transpose_x_0 = const()[name = string("mh_w_transpose_x_0"), val = bool(true)]; + bool mh_w_transpose_y_0 = const()[name = string("mh_w_transpose_y_0"), val = bool(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2236_cast_fp16, y = var_2238_cast_fp16)[name = string("mh_w_cast_fp16")]; + tensor var_2241_cast_fp16 = softmax(axis = var_2152, x = mh_w_cast_fp16)[name = string("op_2241_cast_fp16")]; + tensor var_2242 = const()[name = string("op_2242"), val = tensor([1, 12, 64, -1])]; + tensor var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = string("op_2243_cast_fp16")]; + bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; + bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = string("attn_cast_fp16")]; + tensor var_2246 = const()[name = string("op_2246"), val = tensor([1, 768, 1, -1])]; + tensor input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = string("input_89_cast_fp16")]; + string var_2256_pad_type_0 = const()[name = string("op_2256_pad_type_0"), val = string("valid")]; + tensor var_2256_strides_0 = const()[name = string("op_2256_strides_0"), val = tensor([1, 1])]; + tensor var_2256_pad_0 = const()[name = string("op_2256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2256_dilations_0 = const()[name = string("op_2256_dilations_0"), val = tensor([1, 1])]; + int32 var_2256_groups_0 = const()[name = string("op_2256_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58641024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58936000))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58936128)))]; + tensor var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2256_cast_fp16")]; + string var_2262_pad_type_0 = const()[name = string("op_2262_pad_type_0"), val = string("valid")]; + tensor var_2262_strides_0 = const()[name = string("op_2262_strides_0"), val = tensor([1, 1])]; + tensor var_2262_pad_0 = const()[name = string("op_2262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2262_dilations_0 = const()[name = string("op_2262_dilations_0"), val = tensor([1, 1])]; + int32 var_2262_groups_0 = const()[name = string("op_2262_groups_0"), val = int32(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58943552))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58937728))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2262_cast_fp16")]; + tensor obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = string("obj_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = string("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor([1])]; + fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")]; + tensor input_91_gamma_0_to_fp16 = const()[name = string("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59017344)))]; + tensor input_91_beta_0_to_fp16 = const()[name = string("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59018944)))]; + fp16 input_91_epsilon_0_to_fp16 = const()[name = string("input_91_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_91_cast_fp16")]; + string var_2291_pad_type_0 = const()[name = string("op_2291_pad_type_0"), val = string("valid")]; + tensor var_2291_strides_0 = const()[name = string("op_2291_strides_0"), val = tensor([1, 1])]; + tensor var_2291_pad_0 = const()[name = string("op_2291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2291_dilations_0 = const()[name = string("op_2291_dilations_0"), val = tensor([1, 1])]; + int32 var_2291_groups_0 = const()[name = string("op_2291_groups_0"), val = int32(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59020544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60200256))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60200384)))]; + tensor var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2291_cast_fp16")]; + string var_2297_pad_type_0 = const()[name = string("op_2297_pad_type_0"), val = string("valid")]; + tensor var_2297_strides_0 = const()[name = string("op_2297_strides_0"), val = tensor([1, 1])]; + tensor var_2297_pad_0 = const()[name = string("op_2297_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2297_dilations_0 = const()[name = string("op_2297_dilations_0"), val = tensor([1, 1])]; + int32 var_2297_groups_0 = const()[name = string("op_2297_groups_0"), val = int32(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60238976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60206592))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2297_cast_fp16")]; + tensor input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = string("input_93_cast_fp16")]; + string input_95_mode_0 = const()[name = string("input_95_mode_0"), val = string("EXACT")]; + tensor input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")]; + string var_2308_pad_type_0 = const()[name = string("op_2308_pad_type_0"), val = string("valid")]; + tensor var_2308_strides_0 = const()[name = string("op_2308_strides_0"), val = tensor([1, 1])]; + tensor var_2308_pad_0 = const()[name = string("op_2308_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2308_dilations_0 = const()[name = string("op_2308_dilations_0"), val = tensor([1, 1])]; + int32 var_2308_groups_0 = const()[name = string("op_2308_groups_0"), val = int32(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60533952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61713664))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61713792)))]; + tensor var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2308_cast_fp16")]; + string var_2314_pad_type_0 = const()[name = string("op_2314_pad_type_0"), val = string("valid")]; + tensor var_2314_strides_0 = const()[name = string("op_2314_strides_0"), val = tensor([1, 1])]; + tensor var_2314_pad_0 = const()[name = string("op_2314_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2314_dilations_0 = const()[name = string("op_2314_dilations_0"), val = tensor([1, 1])]; + int32 var_2314_groups_0 = const()[name = string("op_2314_groups_0"), val = int32(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61759168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61715392))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2314_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = string("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = string("out_axes_0"), val = tensor([1])]; + fp16 var_2329_to_fp16 = const()[name = string("op_2329_to_fp16"), val = fp16(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")]; + tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62054144)))]; + tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62055744)))]; + fp16 encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; + tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = string("encoder_output_embeds_type_fp32_cast_fp16")]; + string var_2355_pad_type_0 = const()[name = string("op_2355_pad_type_0"), val = string("valid")]; + tensor var_2355_strides_0 = const()[name = string("op_2355_strides_0"), val = tensor([1, 1])]; + tensor var_2355_pad_0 = const()[name = string("op_2355_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2355_dilations_0 = const()[name = string("op_2355_dilations_0"), val = tensor([1, 1])]; + int32 var_2355_groups_0 = const()[name = string("op_2355_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62057344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62352320))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2355_cast_fp16 = conv(dilations = var_2355_dilations_0, groups = var_2355_groups_0, pad = var_2355_pad_0, pad_type = var_2355_pad_type_0, strides = var_2355_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2355_cast_fp16")]; + string var_2361_pad_type_0 = const()[name = string("op_2361_pad_type_0"), val = string("valid")]; + tensor var_2361_strides_0 = const()[name = string("op_2361_strides_0"), val = tensor([1, 1])]; + tensor var_2361_pad_0 = const()[name = string("op_2361_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2361_dilations_0 = const()[name = string("op_2361_dilations_0"), val = tensor([1, 1])]; + int32 var_2361_groups_0 = const()[name = string("op_2361_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62364032))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62352448))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2361_cast_fp16 = conv(dilations = var_2361_dilations_0, groups = var_2361_groups_0, pad = var_2361_pad_0, pad_type = var_2361_pad_type_0, strides = var_2361_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2361_cast_fp16")]; + tensor var_2362_cast_fp16 = add(x = var_2355_cast_fp16, y = var_2361_cast_fp16)[name = string("op_2362_cast_fp16")]; + string var_2371_pad_type_0 = const()[name = string("op_2371_pad_type_0"), val = string("valid")]; + tensor var_2371_strides_0 = const()[name = string("op_2371_strides_0"), val = tensor([1, 1])]; + tensor var_2371_pad_0 = const()[name = string("op_2371_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2371_dilations_0 = const()[name = string("op_2371_dilations_0"), val = tensor([1, 1])]; + int32 var_2371_groups_0 = const()[name = string("op_2371_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62437824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62732800))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62732928)))]; + tensor var_2371_cast_fp16 = conv(bias = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2371_dilations_0, groups = var_2371_groups_0, pad = var_2371_pad_0, pad_type = var_2371_pad_type_0, strides = var_2371_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2371_cast_fp16")]; + string var_2377_pad_type_0 = const()[name = string("op_2377_pad_type_0"), val = string("valid")]; + tensor var_2377_strides_0 = const()[name = string("op_2377_strides_0"), val = tensor([1, 1])]; + tensor var_2377_pad_0 = const()[name = string("op_2377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2377_dilations_0 = const()[name = string("op_2377_dilations_0"), val = tensor([1, 1])]; + int32 var_2377_groups_0 = const()[name = string("op_2377_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62740032))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62734528))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2377_cast_fp16 = conv(dilations = var_2377_dilations_0, groups = var_2377_groups_0, pad = var_2377_pad_0, pad_type = var_2377_pad_type_0, strides = var_2377_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2377_cast_fp16")]; + tensor var_2378_cast_fp16 = add(x = var_2371_cast_fp16, y = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")]; + string var_2398_pad_type_0 = const()[name = string("op_2398_pad_type_0"), val = string("valid")]; + tensor var_2398_strides_0 = const()[name = string("op_2398_strides_0"), val = tensor([1, 1])]; + tensor var_2398_pad_0 = const()[name = string("op_2398_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2398_dilations_0 = const()[name = string("op_2398_dilations_0"), val = tensor([1, 1])]; + int32 var_2398_groups_0 = const()[name = string("op_2398_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62813824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63108800))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2398_cast_fp16 = conv(dilations = var_2398_dilations_0, groups = var_2398_groups_0, pad = var_2398_pad_0, pad_type = var_2398_pad_type_0, strides = var_2398_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2398_cast_fp16")]; + string var_2404_pad_type_0 = const()[name = string("op_2404_pad_type_0"), val = string("valid")]; + tensor var_2404_strides_0 = const()[name = string("op_2404_strides_0"), val = tensor([1, 1])]; + tensor var_2404_pad_0 = const()[name = string("op_2404_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2404_dilations_0 = const()[name = string("op_2404_dilations_0"), val = tensor([1, 1])]; + int32 var_2404_groups_0 = const()[name = string("op_2404_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63116352))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63108928))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2404_cast_fp16 = conv(dilations = var_2404_dilations_0, groups = var_2404_groups_0, pad = var_2404_pad_0, pad_type = var_2404_pad_type_0, strides = var_2404_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2404_cast_fp16")]; + tensor var_2405_cast_fp16 = add(x = var_2398_cast_fp16, y = var_2404_cast_fp16)[name = string("op_2405_cast_fp16")]; + string var_2414_pad_type_0 = const()[name = string("op_2414_pad_type_0"), val = string("valid")]; + tensor var_2414_strides_0 = const()[name = string("op_2414_strides_0"), val = tensor([1, 1])]; + tensor var_2414_pad_0 = const()[name = string("op_2414_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2414_dilations_0 = const()[name = string("op_2414_dilations_0"), val = tensor([1, 1])]; + int32 var_2414_groups_0 = const()[name = string("op_2414_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63190144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63485120))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63485248)))]; + tensor var_2414_cast_fp16 = conv(bias = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2414_dilations_0, groups = var_2414_groups_0, pad = var_2414_pad_0, pad_type = var_2414_pad_type_0, strides = var_2414_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2414_cast_fp16")]; + string var_2420_pad_type_0 = const()[name = string("op_2420_pad_type_0"), val = string("valid")]; + tensor var_2420_strides_0 = const()[name = string("op_2420_strides_0"), val = tensor([1, 1])]; + tensor var_2420_pad_0 = const()[name = string("op_2420_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2420_dilations_0 = const()[name = string("op_2420_dilations_0"), val = tensor([1, 1])]; + int32 var_2420_groups_0 = const()[name = string("op_2420_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63492928))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63486848))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2420_cast_fp16 = conv(dilations = var_2420_dilations_0, groups = var_2420_groups_0, pad = var_2420_pad_0, pad_type = var_2420_pad_type_0, strides = var_2420_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2420_cast_fp16")]; + tensor var_2421_cast_fp16 = add(x = var_2414_cast_fp16, y = var_2420_cast_fp16)[name = string("op_2421_cast_fp16")]; + string var_2441_pad_type_0 = const()[name = string("op_2441_pad_type_0"), val = string("valid")]; + tensor var_2441_strides_0 = const()[name = string("op_2441_strides_0"), val = tensor([1, 1])]; + tensor var_2441_pad_0 = const()[name = string("op_2441_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2441_dilations_0 = const()[name = string("op_2441_dilations_0"), val = tensor([1, 1])]; + int32 var_2441_groups_0 = const()[name = string("op_2441_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63566720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63861696))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2441_cast_fp16 = conv(dilations = var_2441_dilations_0, groups = var_2441_groups_0, pad = var_2441_pad_0, pad_type = var_2441_pad_type_0, strides = var_2441_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2441_cast_fp16")]; + string var_2447_pad_type_0 = const()[name = string("op_2447_pad_type_0"), val = string("valid")]; + tensor var_2447_strides_0 = const()[name = string("op_2447_strides_0"), val = tensor([1, 1])]; + tensor var_2447_pad_0 = const()[name = string("op_2447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2447_dilations_0 = const()[name = string("op_2447_dilations_0"), val = tensor([1, 1])]; + int32 var_2447_groups_0 = const()[name = string("op_2447_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63873728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63861824))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2447_cast_fp16 = conv(dilations = var_2447_dilations_0, groups = var_2447_groups_0, pad = var_2447_pad_0, pad_type = var_2447_pad_type_0, strides = var_2447_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2447_cast_fp16")]; + tensor var_2448_cast_fp16 = add(x = var_2441_cast_fp16, y = var_2447_cast_fp16)[name = string("op_2448_cast_fp16")]; + string var_2457_pad_type_0 = const()[name = string("op_2457_pad_type_0"), val = string("valid")]; + tensor var_2457_strides_0 = const()[name = string("op_2457_strides_0"), val = tensor([1, 1])]; + tensor var_2457_pad_0 = const()[name = string("op_2457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2457_dilations_0 = const()[name = string("op_2457_dilations_0"), val = tensor([1, 1])]; + int32 var_2457_groups_0 = const()[name = string("op_2457_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63947520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64242496))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64242624)))]; + tensor var_2457_cast_fp16 = conv(bias = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2457_cast_fp16")]; + string var_2463_pad_type_0 = const()[name = string("op_2463_pad_type_0"), val = string("valid")]; + tensor var_2463_strides_0 = const()[name = string("op_2463_strides_0"), val = tensor([1, 1])]; + tensor var_2463_pad_0 = const()[name = string("op_2463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2463_dilations_0 = const()[name = string("op_2463_dilations_0"), val = tensor([1, 1])]; + int32 var_2463_groups_0 = const()[name = string("op_2463_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64254144))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64244224))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2463_cast_fp16")]; + tensor var_2464_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = string("op_2464_cast_fp16")]; + string var_2484_pad_type_0 = const()[name = string("op_2484_pad_type_0"), val = string("valid")]; + tensor var_2484_strides_0 = const()[name = string("op_2484_strides_0"), val = tensor([1, 1])]; + tensor var_2484_pad_0 = const()[name = string("op_2484_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2484_dilations_0 = const()[name = string("op_2484_dilations_0"), val = tensor([1, 1])]; + int32 var_2484_groups_0 = const()[name = string("op_2484_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64327936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64622912))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2484_cast_fp16 = conv(dilations = var_2484_dilations_0, groups = var_2484_groups_0, pad = var_2484_pad_0, pad_type = var_2484_pad_type_0, strides = var_2484_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2484_cast_fp16")]; + string var_2490_pad_type_0 = const()[name = string("op_2490_pad_type_0"), val = string("valid")]; + tensor var_2490_strides_0 = const()[name = string("op_2490_strides_0"), val = tensor([1, 1])]; + tensor var_2490_pad_0 = const()[name = string("op_2490_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2490_dilations_0 = const()[name = string("op_2490_dilations_0"), val = tensor([1, 1])]; + int32 var_2490_groups_0 = const()[name = string("op_2490_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64634496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64623040))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2490_cast_fp16 = conv(dilations = var_2490_dilations_0, groups = var_2490_groups_0, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2490_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2490_cast_fp16")]; + tensor var_2491_cast_fp16 = add(x = var_2484_cast_fp16, y = var_2490_cast_fp16)[name = string("op_2491_cast_fp16")]; + string var_2500_pad_type_0 = const()[name = string("op_2500_pad_type_0"), val = string("valid")]; + tensor var_2500_strides_0 = const()[name = string("op_2500_strides_0"), val = tensor([1, 1])]; + tensor var_2500_pad_0 = const()[name = string("op_2500_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2500_dilations_0 = const()[name = string("op_2500_dilations_0"), val = tensor([1, 1])]; + int32 var_2500_groups_0 = const()[name = string("op_2500_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64708288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65003264))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65003392)))]; + tensor var_2500_cast_fp16 = conv(bias = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2500_cast_fp16")]; + string var_2506_pad_type_0 = const()[name = string("op_2506_pad_type_0"), val = string("valid")]; + tensor var_2506_strides_0 = const()[name = string("op_2506_strides_0"), val = tensor([1, 1])]; + tensor var_2506_pad_0 = const()[name = string("op_2506_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2506_dilations_0 = const()[name = string("op_2506_dilations_0"), val = tensor([1, 1])]; + int32 var_2506_groups_0 = const()[name = string("op_2506_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65015232))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65004992))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2506_cast_fp16 = conv(dilations = var_2506_dilations_0, groups = var_2506_groups_0, pad = var_2506_pad_0, pad_type = var_2506_pad_type_0, strides = var_2506_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2506_cast_fp16")]; + tensor var_2507_cast_fp16 = add(x = var_2500_cast_fp16, y = var_2506_cast_fp16)[name = string("op_2507_cast_fp16")]; + string var_2527_pad_type_0 = const()[name = string("op_2527_pad_type_0"), val = string("valid")]; + tensor var_2527_strides_0 = const()[name = string("op_2527_strides_0"), val = tensor([1, 1])]; + tensor var_2527_pad_0 = const()[name = string("op_2527_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2527_dilations_0 = const()[name = string("op_2527_dilations_0"), val = tensor([1, 1])]; + int32 var_2527_groups_0 = const()[name = string("op_2527_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65089024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65384000))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2527_cast_fp16 = conv(dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2527_cast_fp16")]; + string var_2533_pad_type_0 = const()[name = string("op_2533_pad_type_0"), val = string("valid")]; + tensor var_2533_strides_0 = const()[name = string("op_2533_strides_0"), val = tensor([1, 1])]; + tensor var_2533_pad_0 = const()[name = string("op_2533_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2533_dilations_0 = const()[name = string("op_2533_dilations_0"), val = tensor([1, 1])]; + int32 var_2533_groups_0 = const()[name = string("op_2533_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65393024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65384128))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2533_cast_fp16 = conv(dilations = var_2533_dilations_0, groups = var_2533_groups_0, pad = var_2533_pad_0, pad_type = var_2533_pad_type_0, strides = var_2533_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2533_cast_fp16")]; + tensor var_2534_cast_fp16 = add(x = var_2527_cast_fp16, y = var_2533_cast_fp16)[name = string("op_2534_cast_fp16")]; + string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")]; + tensor var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor([1, 1])]; + tensor var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor([1, 1])]; + int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65466816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65761792))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65761920)))]; + tensor var_2543_cast_fp16 = conv(bias = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2543_cast_fp16")]; + string var_2549_pad_type_0 = const()[name = string("op_2549_pad_type_0"), val = string("valid")]; + tensor var_2549_strides_0 = const()[name = string("op_2549_strides_0"), val = tensor([1, 1])]; + tensor var_2549_pad_0 = const()[name = string("op_2549_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2549_dilations_0 = const()[name = string("op_2549_dilations_0"), val = tensor([1, 1])]; + int32 var_2549_groups_0 = const()[name = string("op_2549_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772736))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65763520))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2549_cast_fp16 = conv(dilations = var_2549_dilations_0, groups = var_2549_groups_0, pad = var_2549_pad_0, pad_type = var_2549_pad_type_0, strides = var_2549_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2549_cast_fp16")]; + tensor var_2550_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2549_cast_fp16)[name = string("op_2550_cast_fp16")]; + string var_2570_pad_type_0 = const()[name = string("op_2570_pad_type_0"), val = string("valid")]; + tensor var_2570_strides_0 = const()[name = string("op_2570_strides_0"), val = tensor([1, 1])]; + tensor var_2570_pad_0 = const()[name = string("op_2570_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2570_dilations_0 = const()[name = string("op_2570_dilations_0"), val = tensor([1, 1])]; + int32 var_2570_groups_0 = const()[name = string("op_2570_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65846528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66141504))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2570_cast_fp16 = conv(dilations = var_2570_dilations_0, groups = var_2570_groups_0, pad = var_2570_pad_0, pad_type = var_2570_pad_type_0, strides = var_2570_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2570_cast_fp16")]; + string var_2576_pad_type_0 = const()[name = string("op_2576_pad_type_0"), val = string("valid")]; + tensor var_2576_strides_0 = const()[name = string("op_2576_strides_0"), val = tensor([1, 1])]; + tensor var_2576_pad_0 = const()[name = string("op_2576_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2576_dilations_0 = const()[name = string("op_2576_dilations_0"), val = tensor([1, 1])]; + int32 var_2576_groups_0 = const()[name = string("op_2576_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66149056))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66141632))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2576_cast_fp16 = conv(dilations = var_2576_dilations_0, groups = var_2576_groups_0, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2576_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2576_cast_fp16")]; + tensor var_2577_cast_fp16 = add(x = var_2570_cast_fp16, y = var_2576_cast_fp16)[name = string("op_2577_cast_fp16")]; + string var_2586_pad_type_0 = const()[name = string("op_2586_pad_type_0"), val = string("valid")]; + tensor var_2586_strides_0 = const()[name = string("op_2586_strides_0"), val = tensor([1, 1])]; + tensor var_2586_pad_0 = const()[name = string("op_2586_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2586_dilations_0 = const()[name = string("op_2586_dilations_0"), val = tensor([1, 1])]; + int32 var_2586_groups_0 = const()[name = string("op_2586_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66222848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66517824))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66517952)))]; + tensor var_2586_cast_fp16 = conv(bias = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2586_dilations_0, groups = var_2586_groups_0, pad = var_2586_pad_0, pad_type = var_2586_pad_type_0, strides = var_2586_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2586_cast_fp16")]; + string var_2592_pad_type_0 = const()[name = string("op_2592_pad_type_0"), val = string("valid")]; + tensor var_2592_strides_0 = const()[name = string("op_2592_strides_0"), val = tensor([1, 1])]; + tensor var_2592_pad_0 = const()[name = string("op_2592_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2592_dilations_0 = const()[name = string("op_2592_dilations_0"), val = tensor([1, 1])]; + int32 var_2592_groups_0 = const()[name = string("op_2592_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66525632))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66519552))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2592_cast_fp16 = conv(dilations = var_2592_dilations_0, groups = var_2592_groups_0, pad = var_2592_pad_0, pad_type = var_2592_pad_type_0, strides = var_2592_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = add(x = var_2586_cast_fp16, y = var_2592_cast_fp16)[name = string("op_2593_cast_fp16")]; + string var_2613_pad_type_0 = const()[name = string("op_2613_pad_type_0"), val = string("valid")]; + tensor var_2613_strides_0 = const()[name = string("op_2613_strides_0"), val = tensor([1, 1])]; + tensor var_2613_pad_0 = const()[name = string("op_2613_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2613_dilations_0 = const()[name = string("op_2613_dilations_0"), val = tensor([1, 1])]; + int32 var_2613_groups_0 = const()[name = string("op_2613_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66599424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66894400))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2613_cast_fp16 = conv(dilations = var_2613_dilations_0, groups = var_2613_groups_0, pad = var_2613_pad_0, pad_type = var_2613_pad_type_0, strides = var_2613_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2613_cast_fp16")]; + string var_2619_pad_type_0 = const()[name = string("op_2619_pad_type_0"), val = string("valid")]; + tensor var_2619_strides_0 = const()[name = string("op_2619_strides_0"), val = tensor([1, 1])]; + tensor var_2619_pad_0 = const()[name = string("op_2619_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2619_dilations_0 = const()[name = string("op_2619_dilations_0"), val = tensor([1, 1])]; + int32 var_2619_groups_0 = const()[name = string("op_2619_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66900864))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66894528))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2619_cast_fp16 = conv(dilations = var_2619_dilations_0, groups = var_2619_groups_0, pad = var_2619_pad_0, pad_type = var_2619_pad_type_0, strides = var_2619_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2619_cast_fp16")]; + tensor var_2620_cast_fp16 = add(x = var_2613_cast_fp16, y = var_2619_cast_fp16)[name = string("op_2620_cast_fp16")]; + string var_2629_pad_type_0 = const()[name = string("op_2629_pad_type_0"), val = string("valid")]; + tensor var_2629_strides_0 = const()[name = string("op_2629_strides_0"), val = tensor([1, 1])]; + tensor var_2629_pad_0 = const()[name = string("op_2629_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2629_dilations_0 = const()[name = string("op_2629_dilations_0"), val = tensor([1, 1])]; + int32 var_2629_groups_0 = const()[name = string("op_2629_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66974656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67269632))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67269760)))]; + tensor var_2629_cast_fp16 = conv(bias = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2629_dilations_0, groups = var_2629_groups_0, pad = var_2629_pad_0, pad_type = var_2629_pad_type_0, strides = var_2629_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2629_cast_fp16")]; + string var_2635_pad_type_0 = const()[name = string("op_2635_pad_type_0"), val = string("valid")]; + tensor var_2635_strides_0 = const()[name = string("op_2635_strides_0"), val = tensor([1, 1])]; + tensor var_2635_pad_0 = const()[name = string("op_2635_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2635_dilations_0 = const()[name = string("op_2635_dilations_0"), val = tensor([1, 1])]; + int32 var_2635_groups_0 = const()[name = string("op_2635_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67279872))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67271360))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2635_cast_fp16 = conv(dilations = var_2635_dilations_0, groups = var_2635_groups_0, pad = var_2635_pad_0, pad_type = var_2635_pad_type_0, strides = var_2635_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2635_cast_fp16")]; + tensor var_2636_cast_fp16 = add(x = var_2629_cast_fp16, y = var_2635_cast_fp16)[name = string("op_2636_cast_fp16")]; + string var_2656_pad_type_0 = const()[name = string("op_2656_pad_type_0"), val = string("valid")]; + tensor var_2656_strides_0 = const()[name = string("op_2656_strides_0"), val = tensor([1, 1])]; + tensor var_2656_pad_0 = const()[name = string("op_2656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2656_dilations_0 = const()[name = string("op_2656_dilations_0"), val = tensor([1, 1])]; + int32 var_2656_groups_0 = const()[name = string("op_2656_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67353664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67648640))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2656_cast_fp16 = conv(dilations = var_2656_dilations_0, groups = var_2656_groups_0, pad = var_2656_pad_0, pad_type = var_2656_pad_type_0, strides = var_2656_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2656_cast_fp16")]; + string var_2662_pad_type_0 = const()[name = string("op_2662_pad_type_0"), val = string("valid")]; + tensor var_2662_strides_0 = const()[name = string("op_2662_strides_0"), val = tensor([1, 1])]; + tensor var_2662_pad_0 = const()[name = string("op_2662_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2662_dilations_0 = const()[name = string("op_2662_dilations_0"), val = tensor([1, 1])]; + int32 var_2662_groups_0 = const()[name = string("op_2662_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67656128))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67648768))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2662_cast_fp16 = conv(dilations = var_2662_dilations_0, groups = var_2662_groups_0, pad = var_2662_pad_0, pad_type = var_2662_pad_type_0, strides = var_2662_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2662_cast_fp16")]; + tensor var_2663_cast_fp16 = add(x = var_2656_cast_fp16, y = var_2662_cast_fp16)[name = string("op_2663_cast_fp16")]; + string var_2672_pad_type_0 = const()[name = string("op_2672_pad_type_0"), val = string("valid")]; + tensor var_2672_strides_0 = const()[name = string("op_2672_strides_0"), val = tensor([1, 1])]; + tensor var_2672_pad_0 = const()[name = string("op_2672_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2672_dilations_0 = const()[name = string("op_2672_dilations_0"), val = tensor([1, 1])]; + int32 var_2672_groups_0 = const()[name = string("op_2672_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67729920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68024896))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68025024)))]; + tensor var_2672_cast_fp16 = conv(bias = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2672_dilations_0, groups = var_2672_groups_0, pad = var_2672_pad_0, pad_type = var_2672_pad_type_0, strides = var_2672_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2672_cast_fp16")]; + string var_2678_pad_type_0 = const()[name = string("op_2678_pad_type_0"), val = string("valid")]; + tensor var_2678_strides_0 = const()[name = string("op_2678_strides_0"), val = tensor([1, 1])]; + tensor var_2678_pad_0 = const()[name = string("op_2678_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2678_dilations_0 = const()[name = string("op_2678_dilations_0"), val = tensor([1, 1])]; + int32 var_2678_groups_0 = const()[name = string("op_2678_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68033728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68026624))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2678_cast_fp16 = conv(dilations = var_2678_dilations_0, groups = var_2678_groups_0, pad = var_2678_pad_0, pad_type = var_2678_pad_type_0, strides = var_2678_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2678_cast_fp16")]; + tensor var_2679_cast_fp16 = add(x = var_2672_cast_fp16, y = var_2678_cast_fp16)[name = string("op_2679_cast_fp16")]; + string var_2699_pad_type_0 = const()[name = string("op_2699_pad_type_0"), val = string("valid")]; + tensor var_2699_strides_0 = const()[name = string("op_2699_strides_0"), val = tensor([1, 1])]; + tensor var_2699_pad_0 = const()[name = string("op_2699_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2699_dilations_0 = const()[name = string("op_2699_dilations_0"), val = tensor([1, 1])]; + int32 var_2699_groups_0 = const()[name = string("op_2699_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68107520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68402496))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2699_cast_fp16 = conv(dilations = var_2699_dilations_0, groups = var_2699_groups_0, pad = var_2699_pad_0, pad_type = var_2699_pad_type_0, strides = var_2699_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2699_cast_fp16")]; + string var_2705_pad_type_0 = const()[name = string("op_2705_pad_type_0"), val = string("valid")]; + tensor var_2705_strides_0 = const()[name = string("op_2705_strides_0"), val = tensor([1, 1])]; + tensor var_2705_pad_0 = const()[name = string("op_2705_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2705_dilations_0 = const()[name = string("op_2705_dilations_0"), val = tensor([1, 1])]; + int32 var_2705_groups_0 = const()[name = string("op_2705_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68409920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68402624))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2705_cast_fp16 = conv(dilations = var_2705_dilations_0, groups = var_2705_groups_0, pad = var_2705_pad_0, pad_type = var_2705_pad_type_0, strides = var_2705_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2705_cast_fp16")]; + tensor var_2706_cast_fp16 = add(x = var_2699_cast_fp16, y = var_2705_cast_fp16)[name = string("op_2706_cast_fp16")]; + string var_2715_pad_type_0 = const()[name = string("op_2715_pad_type_0"), val = string("valid")]; + tensor var_2715_strides_0 = const()[name = string("op_2715_strides_0"), val = tensor([1, 1])]; + tensor var_2715_pad_0 = const()[name = string("op_2715_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2715_dilations_0 = const()[name = string("op_2715_dilations_0"), val = tensor([1, 1])]; + int32 var_2715_groups_0 = const()[name = string("op_2715_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68483712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68778688))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68778816)))]; + tensor var_2715_cast_fp16 = conv(bias = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2715_dilations_0, groups = var_2715_groups_0, pad = var_2715_pad_0, pad_type = var_2715_pad_type_0, strides = var_2715_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2715_cast_fp16")]; + string var_2721_pad_type_0 = const()[name = string("op_2721_pad_type_0"), val = string("valid")]; + tensor var_2721_strides_0 = const()[name = string("op_2721_strides_0"), val = tensor([1, 1])]; + tensor var_2721_pad_0 = const()[name = string("op_2721_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2721_dilations_0 = const()[name = string("op_2721_dilations_0"), val = tensor([1, 1])]; + int32 var_2721_groups_0 = const()[name = string("op_2721_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68785920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68780416))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2721_cast_fp16 = conv(dilations = var_2721_dilations_0, groups = var_2721_groups_0, pad = var_2721_pad_0, pad_type = var_2721_pad_type_0, strides = var_2721_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2721_cast_fp16")]; + tensor var_2722_cast_fp16 = add(x = var_2715_cast_fp16, y = var_2721_cast_fp16)[name = string("op_2722_cast_fp16")]; + string var_2742_pad_type_0 = const()[name = string("op_2742_pad_type_0"), val = string("valid")]; + tensor var_2742_strides_0 = const()[name = string("op_2742_strides_0"), val = tensor([1, 1])]; + tensor var_2742_pad_0 = const()[name = string("op_2742_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2742_dilations_0 = const()[name = string("op_2742_dilations_0"), val = tensor([1, 1])]; + int32 var_2742_groups_0 = const()[name = string("op_2742_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68859712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69154688))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2742_cast_fp16 = conv(dilations = var_2742_dilations_0, groups = var_2742_groups_0, pad = var_2742_pad_0, pad_type = var_2742_pad_type_0, strides = var_2742_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2742_cast_fp16")]; + string var_2748_pad_type_0 = const()[name = string("op_2748_pad_type_0"), val = string("valid")]; + tensor var_2748_strides_0 = const()[name = string("op_2748_strides_0"), val = tensor([1, 1])]; + tensor var_2748_pad_0 = const()[name = string("op_2748_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2748_dilations_0 = const()[name = string("op_2748_dilations_0"), val = tensor([1, 1])]; + int32 var_2748_groups_0 = const()[name = string("op_2748_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69161664))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69154816))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2748_cast_fp16 = conv(dilations = var_2748_dilations_0, groups = var_2748_groups_0, pad = var_2748_pad_0, pad_type = var_2748_pad_type_0, strides = var_2748_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2748_cast_fp16")]; + tensor var_2749_cast_fp16 = add(x = var_2742_cast_fp16, y = var_2748_cast_fp16)[name = string("op_2749_cast_fp16")]; + string var_2758_pad_type_0 = const()[name = string("op_2758_pad_type_0"), val = string("valid")]; + tensor var_2758_strides_0 = const()[name = string("op_2758_strides_0"), val = tensor([1, 1])]; + tensor var_2758_pad_0 = const()[name = string("op_2758_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2758_dilations_0 = const()[name = string("op_2758_dilations_0"), val = tensor([1, 1])]; + int32 var_2758_groups_0 = const()[name = string("op_2758_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69235456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69530432))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69530560)))]; + tensor var_2758_cast_fp16 = conv(bias = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2758_dilations_0, groups = var_2758_groups_0, pad = var_2758_pad_0, pad_type = var_2758_pad_type_0, strides = var_2758_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2758_cast_fp16")]; + string var_2764_pad_type_0 = const()[name = string("op_2764_pad_type_0"), val = string("valid")]; + tensor var_2764_strides_0 = const()[name = string("op_2764_strides_0"), val = tensor([1, 1])]; + tensor var_2764_pad_0 = const()[name = string("op_2764_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2764_dilations_0 = const()[name = string("op_2764_dilations_0"), val = tensor([1, 1])]; + int32 var_2764_groups_0 = const()[name = string("op_2764_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69537216))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69532160))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2764_cast_fp16 = conv(dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2764_cast_fp16")]; + tensor var_2765_cast_fp16 = add(x = var_2758_cast_fp16, y = var_2764_cast_fp16)[name = string("op_2765_cast_fp16")]; + string var_2785_pad_type_0 = const()[name = string("op_2785_pad_type_0"), val = string("valid")]; + tensor var_2785_strides_0 = const()[name = string("op_2785_strides_0"), val = tensor([1, 1])]; + tensor var_2785_pad_0 = const()[name = string("op_2785_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2785_dilations_0 = const()[name = string("op_2785_dilations_0"), val = tensor([1, 1])]; + int32 var_2785_groups_0 = const()[name = string("op_2785_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69611008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69905984))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2785_cast_fp16 = conv(dilations = var_2785_dilations_0, groups = var_2785_groups_0, pad = var_2785_pad_0, pad_type = var_2785_pad_type_0, strides = var_2785_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2785_cast_fp16")]; + string var_2791_pad_type_0 = const()[name = string("op_2791_pad_type_0"), val = string("valid")]; + tensor var_2791_strides_0 = const()[name = string("op_2791_strides_0"), val = tensor([1, 1])]; + tensor var_2791_pad_0 = const()[name = string("op_2791_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2791_dilations_0 = const()[name = string("op_2791_dilations_0"), val = tensor([1, 1])]; + int32 var_2791_groups_0 = const()[name = string("op_2791_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69913536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69906112))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2791_cast_fp16 = conv(dilations = var_2791_dilations_0, groups = var_2791_groups_0, pad = var_2791_pad_0, pad_type = var_2791_pad_type_0, strides = var_2791_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2791_cast_fp16")]; + tensor var_2792_cast_fp16 = add(x = var_2785_cast_fp16, y = var_2791_cast_fp16)[name = string("op_2792_cast_fp16")]; + string var_2801_pad_type_0 = const()[name = string("op_2801_pad_type_0"), val = string("valid")]; + tensor var_2801_strides_0 = const()[name = string("op_2801_strides_0"), val = tensor([1, 1])]; + tensor var_2801_pad_0 = const()[name = string("op_2801_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2801_dilations_0 = const()[name = string("op_2801_dilations_0"), val = tensor([1, 1])]; + int32 var_2801_groups_0 = const()[name = string("op_2801_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69987328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70282304))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70282432)))]; + tensor var_2801_cast_fp16 = conv(bias = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2801_dilations_0, groups = var_2801_groups_0, pad = var_2801_pad_0, pad_type = var_2801_pad_type_0, strides = var_2801_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2801_cast_fp16")]; + string var_2807_pad_type_0 = const()[name = string("op_2807_pad_type_0"), val = string("valid")]; + tensor var_2807_strides_0 = const()[name = string("op_2807_strides_0"), val = tensor([1, 1])]; + tensor var_2807_pad_0 = const()[name = string("op_2807_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2807_dilations_0 = const()[name = string("op_2807_dilations_0"), val = tensor([1, 1])]; + int32 var_2807_groups_0 = const()[name = string("op_2807_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70289984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70284032))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2807_cast_fp16 = conv(dilations = var_2807_dilations_0, groups = var_2807_groups_0, pad = var_2807_pad_0, pad_type = var_2807_pad_type_0, strides = var_2807_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2807_cast_fp16")]; + tensor var_2808_cast_fp16 = add(x = var_2801_cast_fp16, y = var_2807_cast_fp16)[name = string("op_2808_cast_fp16")]; + string var_2828_pad_type_0 = const()[name = string("op_2828_pad_type_0"), val = string("valid")]; + tensor var_2828_strides_0 = const()[name = string("op_2828_strides_0"), val = tensor([1, 1])]; + tensor var_2828_pad_0 = const()[name = string("op_2828_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2828_dilations_0 = const()[name = string("op_2828_dilations_0"), val = tensor([1, 1])]; + int32 var_2828_groups_0 = const()[name = string("op_2828_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70363776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70658752))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; + tensor var_2828_cast_fp16 = conv(dilations = var_2828_dilations_0, groups = var_2828_groups_0, pad = var_2828_pad_0, pad_type = var_2828_pad_type_0, strides = var_2828_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2828_cast_fp16")]; + string var_2834_pad_type_0 = const()[name = string("op_2834_pad_type_0"), val = string("valid")]; + tensor var_2834_strides_0 = const()[name = string("op_2834_strides_0"), val = tensor([1, 1])]; + tensor var_2834_pad_0 = const()[name = string("op_2834_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2834_dilations_0 = const()[name = string("op_2834_dilations_0"), val = tensor([1, 1])]; + int32 var_2834_groups_0 = const()[name = string("op_2834_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70665152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70658880))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2834_cast_fp16 = conv(dilations = var_2834_dilations_0, groups = var_2834_groups_0, pad = var_2834_pad_0, pad_type = var_2834_pad_type_0, strides = var_2834_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2834_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_2828_cast_fp16, y = var_2834_cast_fp16)[name = string("k_cast_fp16")]; + string var_2844_pad_type_0 = const()[name = string("op_2844_pad_type_0"), val = string("valid")]; + tensor var_2844_strides_0 = const()[name = string("op_2844_strides_0"), val = tensor([1, 1])]; + tensor var_2844_pad_0 = const()[name = string("op_2844_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2844_dilations_0 = const()[name = string("op_2844_dilations_0"), val = tensor([1, 1])]; + int32 var_2844_groups_0 = const()[name = string("op_2844_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70738944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71033920))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; + tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71034048)))]; + tensor var_2844_cast_fp16 = conv(bias = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2844_dilations_0, groups = var_2844_groups_0, pad = var_2844_pad_0, pad_type = var_2844_pad_type_0, strides = var_2844_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2844_cast_fp16")]; + string var_2850_pad_type_0 = const()[name = string("op_2850_pad_type_0"), val = string("valid")]; + tensor var_2850_strides_0 = const()[name = string("op_2850_strides_0"), val = tensor([1, 1])]; + tensor var_2850_pad_0 = const()[name = string("op_2850_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2850_dilations_0 = const()[name = string("op_2850_dilations_0"), val = tensor([1, 1])]; + int32 var_2850_groups_0 = const()[name = string("op_2850_groups_0"), val = int32(1)]; + tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71041984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71035648))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; + tensor var_2850_cast_fp16 = conv(dilations = var_2850_dilations_0, groups = var_2850_groups_0, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2850_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2850_cast_fp16")]; + tensor v_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2850_cast_fp16)[name = string("v_cast_fp16")]; + int32 var_2856 = const()[name = string("op_2856"), val = int32(0)]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99_cast_fp16 = concat(axis = var_2856, interleave = input_99_interleave_0, values = (var_2362_cast_fp16, var_2405_cast_fp16, var_2448_cast_fp16, var_2491_cast_fp16, var_2534_cast_fp16, var_2577_cast_fp16, var_2620_cast_fp16, var_2663_cast_fp16, var_2706_cast_fp16, var_2749_cast_fp16, var_2792_cast_fp16, k_cast_fp16))[name = string("input_99_cast_fp16")]; + int32 var_2859 = const()[name = string("op_2859"), val = int32(0)]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_2859, interleave = input_interleave_0, values = (var_2378_cast_fp16, var_2421_cast_fp16, var_2464_cast_fp16, var_2507_cast_fp16, var_2550_cast_fp16, var_2593_cast_fp16, var_2636_cast_fp16, var_2679_cast_fp16, var_2722_cast_fp16, var_2765_cast_fp16, var_2808_cast_fp16, v_cast_fp16))[name = string("input_cast_fp16")]; + tensor var_2866_pad_0 = const()[name = string("op_2866_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 36])]; + string var_2866_mode_0 = const()[name = string("op_2866_mode_0"), val = string("constant")]; + fp16 const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = fp16(0x0p+0)]; + tensor encoder_attn_key_cache = pad(constant_val = const_13_to_fp16, mode = var_2866_mode_0, pad = var_2866_pad_0, x = input_99_cast_fp16)[name = string("op_2866_cast_fp16")]; + tensor var_2872_pad_0 = const()[name = string("op_2872_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 36])]; + string var_2872_mode_0 = const()[name = string("op_2872_mode_0"), val = string("constant")]; + fp16 const_14_to_fp16 = const()[name = string("const_14_to_fp16"), val = fp16(0x0p+0)]; + tensor encoder_attn_value_cache = pad(constant_val = const_14_to_fp16, mode = var_2872_mode_0, pad = var_2872_pad_0, x = input_cast_fp16)[name = string("op_2872_cast_fp16")]; + } -> (encoder_output_embeds, encoder_attn_key_cache, encoder_attn_value_cache); +} \ No newline at end of file