diff --git "a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" deleted file mode 100644--- "a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" +++ /dev/null @@ -1,941 +0,0 @@ -program(1.3) -[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] -{ - func main(tensor cache_length, tensor decoder_key_padding_mask, state> encoder_attn_key_cache, state> encoder_attn_key_padding_mask, state> encoder_attn_value_cache, tensor input_ids, tensor kv_cache_update_mask, state> self_attn_key_cache, state> self_attn_value_cache) { - int32 var_26_axis_0 = const()[name = string("op_26_axis_0"), val = int32(0)]; - int32 var_26_batch_dims_0 = const()[name = string("op_26_batch_dims_0"), val = int32(0)]; - bool var_26_validate_indices_0 = const()[name = string("op_26_validate_indices_0"), val = bool(false)]; - tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; - tensor var_26_cast_fp16 = gather(axis = var_26_axis_0, batch_dims = var_26_batch_dims_0, indices = input_ids, validate_indices = var_26_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_26_cast_fp16")]; - int32 var_33_axis_0 = const()[name = string("op_33_axis_0"), val = int32(0)]; - int32 var_33_batch_dims_0 = const()[name = string("op_33_batch_dims_0"), val = int32(0)]; - bool var_33_validate_indices_0 = const()[name = string("op_33_validate_indices_0"), val = bool(false)]; - tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))]; - string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; - tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")]; - tensor var_33_cast_fp16_cast_uint16 = gather(axis = var_33_axis_0, batch_dims = var_33_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_33_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_33_cast_fp16_cast_uint16")]; - int32 var_35_axis_0 = const()[name = string("op_35_axis_0"), val = int32(0)]; - int32 var_35_batch_dims_0 = const()[name = string("op_35_batch_dims_0"), val = int32(0)]; - bool var_35_validate_indices_0 = const()[name = string("op_35_validate_indices_0"), val = bool(false)]; - tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133941312))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")]; - tensor var_35_cast_fp16_cast_uint16 = gather(axis = var_35_axis_0, batch_dims = var_35_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_35_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_35_cast_fp16_cast_uint16")]; - tensor var_36_cast_fp16 = add(x = var_33_cast_fp16_cast_uint16, y = var_35_cast_fp16_cast_uint16)[name = string("op_36_cast_fp16")]; - tensor hidden_states_1_cast_fp16 = add(x = var_26_cast_fp16, y = var_36_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; - tensor var_50_axes_0 = const()[name = string("op_50_axes_0"), val = tensor([2])]; - tensor var_50_cast_fp16 = expand_dims(axes = var_50_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_50_cast_fp16")]; - tensor inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor([3])]; - tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_50_cast_fp16)[name = string("inputs_1_cast_fp16")]; - tensor read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")]; - tensor tile_0 = const()[name = string("tile_0"), val = tensor([1, 1, 1, 1])]; - int32 var_55_axis_0 = const()[name = string("op_55_axis_0"), val = int32(0)]; - tensor var_55_cast_fp16_0, tensor var_55_cast_fp16_1, tensor var_55_cast_fp16_2, tensor var_55_cast_fp16_3 = split(axis = var_55_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_55_cast_fp16")]; - tensor read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")]; - tensor tile_1 = const()[name = string("tile_1"), val = tensor([1, 1, 1, 1])]; - int32 var_62_axis_0 = const()[name = string("op_62_axis_0"), val = int32(0)]; - tensor var_62_cast_fp16_0, tensor var_62_cast_fp16_1, tensor var_62_cast_fp16_2, tensor var_62_cast_fp16_3 = split(axis = var_62_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_62_cast_fp16")]; - tensor read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")]; - tensor obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor([1, 1280, 1, 1536])]; - tensor obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")]; - tensor read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")]; - tensor obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor([1, 1280, 1, 1536])]; - tensor obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")]; - int32 var_82 = const()[name = string("op_82"), val = int32(3)]; - tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; - fp16 var_107_to_fp16 = const()[name = string("op_107_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_107_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")]; - tensor obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134013056)))]; - tensor obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134015680)))]; - tensor obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134018304)))]; - tensor obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134020928)))]; - fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")]; - string var_129_pad_type_0 = const()[name = string("op_129_pad_type_0"), val = string("valid")]; - tensor var_129_strides_0 = const()[name = string("op_129_strides_0"), val = tensor([1, 1])]; - tensor var_129_pad_0 = const()[name = string("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_129_dilations_0 = const()[name = string("op_129_dilations_0"), val = tensor([1, 1])]; - int32 var_129_groups_0 = const()[name = string("op_129_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134023552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842816))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842944)))]; - tensor var_129_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_129_dilations_0, groups = var_129_groups_0, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_129_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_129_cast_fp16")]; - string var_135_pad_type_0 = const()[name = string("op_135_pad_type_0"), val = string("valid")]; - tensor var_135_strides_0 = const()[name = string("op_135_strides_0"), val = tensor([1, 1])]; - tensor var_135_pad_0 = const()[name = string("op_135_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_135_dilations_0 = const()[name = string("op_135_dilations_0"), val = tensor([1, 1])]; - int32 var_135_groups_0 = const()[name = string("op_135_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918592))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134845568))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_135_cast_fp16 = conv(dilations = var_135_dilations_0, groups = var_135_groups_0, pad = var_135_pad_0, pad_type = var_135_pad_type_0, strides = var_135_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_135_cast_fp16")]; - tensor query_1_cast_fp16 = add(x = var_129_cast_fp16, y = var_135_cast_fp16)[name = string("query_1_cast_fp16")]; - string var_144_pad_type_0 = const()[name = string("op_144_pad_type_0"), val = string("valid")]; - tensor var_144_strides_0 = const()[name = string("op_144_strides_0"), val = tensor([1, 1])]; - tensor var_144_pad_0 = const()[name = string("op_144_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_144_dilations_0 = const()[name = string("op_144_dilations_0"), val = tensor([1, 1])]; - int32 var_144_groups_0 = const()[name = string("op_144_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135123456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942720))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_144_cast_fp16 = conv(dilations = var_144_dilations_0, groups = var_144_groups_0, pad = var_144_pad_0, pad_type = var_144_pad_type_0, strides = var_144_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_144_cast_fp16")]; - string var_150_pad_type_0 = const()[name = string("op_150_pad_type_0"), val = string("valid")]; - tensor var_150_strides_0 = const()[name = string("op_150_strides_0"), val = tensor([1, 1])]; - tensor var_150_pad_0 = const()[name = string("op_150_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_150_dilations_0 = const()[name = string("op_150_dilations_0"), val = tensor([1, 1])]; - int32 var_150_groups_0 = const()[name = string("op_150_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135976320))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942848))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_150_cast_fp16 = conv(dilations = var_150_dilations_0, groups = var_150_groups_0, pad = var_150_pad_0, pad_type = var_150_pad_type_0, strides = var_150_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_150_cast_fp16")]; - tensor current_key_1_cast_fp16 = add(x = var_144_cast_fp16, y = var_150_cast_fp16)[name = string("current_key_1_cast_fp16")]; - string var_160_pad_type_0 = const()[name = string("op_160_pad_type_0"), val = string("valid")]; - tensor var_160_strides_0 = const()[name = string("op_160_strides_0"), val = tensor([1, 1])]; - tensor var_160_pad_0 = const()[name = string("op_160_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_160_dilations_0 = const()[name = string("op_160_dilations_0"), val = tensor([1, 1])]; - int32 var_160_groups_0 = const()[name = string("op_160_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136181184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000448))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000576)))]; - tensor var_160_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_160_dilations_0, groups = var_160_groups_0, pad = var_160_pad_0, pad_type = var_160_pad_type_0, strides = var_160_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_160_cast_fp16")]; - string var_166_pad_type_0 = const()[name = string("op_166_pad_type_0"), val = string("valid")]; - tensor var_166_strides_0 = const()[name = string("op_166_strides_0"), val = tensor([1, 1])]; - tensor var_166_pad_0 = const()[name = string("op_166_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_166_dilations_0 = const()[name = string("op_166_dilations_0"), val = tensor([1, 1])]; - int32 var_166_groups_0 = const()[name = string("op_166_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137046720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137003200))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_166_cast_fp16 = conv(dilations = var_166_dilations_0, groups = var_166_groups_0, pad = var_166_pad_0, pad_type = var_166_pad_type_0, strides = var_166_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_166_cast_fp16")]; - tensor current_value_1_cast_fp16 = add(x = var_160_cast_fp16, y = var_166_cast_fp16)[name = string("current_value_1_cast_fp16")]; - tensor var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor([1])]; - tensor var_169_cast_fp16 = expand_dims(axes = var_169_axes_0, x = kv_cache_update_mask)[name = string("op_169_cast_fp16")]; - tensor var_170_axes_0 = const()[name = string("op_170_axes_0"), val = tensor([2])]; - tensor var_170_cast_fp16 = expand_dims(axes = var_170_axes_0, x = var_169_cast_fp16)[name = string("op_170_cast_fp16")]; - tensor var_172_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_172_cast_fp16")]; - tensor key_1_cast_fp16 = add(x = var_55_cast_fp16_0, y = var_172_cast_fp16)[name = string("key_1_cast_fp16")]; - tensor var_174_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_174_cast_fp16")]; - tensor value_1_cast_fp16 = add(x = var_62_cast_fp16_0, y = var_174_cast_fp16)[name = string("value_1_cast_fp16")]; - tensor var_177 = const()[name = string("op_177"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_1_cast_fp16 = reshape(shape = var_177, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; - fp16 var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = fp16(0x1p-3)]; - tensor var_180_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_179_to_fp16)[name = string("op_180_cast_fp16")]; - tensor var_181 = const()[name = string("op_181"), val = tensor([1, 20, 64, -1])]; - tensor var_182_cast_fp16 = reshape(shape = var_181, x = key_1_cast_fp16)[name = string("op_182_cast_fp16")]; - bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; - bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; - tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_180_cast_fp16, y = var_182_cast_fp16)[name = string("mh_w_1_cast_fp16")]; - tensor var_186_axes_0 = const()[name = string("op_186_axes_0"), val = tensor([1])]; - tensor var_186_cast_fp16 = expand_dims(axes = var_186_axes_0, x = decoder_key_padding_mask)[name = string("op_186_cast_fp16")]; - tensor var_187_axes_0 = const()[name = string("op_187_axes_0"), val = tensor([2])]; - tensor var_187_cast_fp16 = expand_dims(axes = var_187_axes_0, x = var_186_cast_fp16)[name = string("op_187_cast_fp16")]; - tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_3_cast_fp16")]; - tensor var_190_cast_fp16 = softmax(axis = var_82, x = mh_w_3_cast_fp16)[name = string("op_190_cast_fp16")]; - tensor var_191 = const()[name = string("op_191"), val = tensor([1, 20, 64, -1])]; - tensor var_192_cast_fp16 = reshape(shape = var_191, x = value_1_cast_fp16)[name = string("op_192_cast_fp16")]; - bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; - bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_192_cast_fp16, y = var_190_cast_fp16)[name = string("attn_1_cast_fp16")]; - tensor var_195 = const()[name = string("op_195"), val = tensor([1, 1280, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_195, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; - string var_205_pad_type_0 = const()[name = string("op_205_pad_type_0"), val = string("valid")]; - tensor var_205_strides_0 = const()[name = string("op_205_strides_0"), val = tensor([1, 1])]; - tensor var_205_pad_0 = const()[name = string("op_205_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_205_dilations_0 = const()[name = string("op_205_dilations_0"), val = tensor([1, 1])]; - int32 var_205_groups_0 = const()[name = string("op_205_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137251584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070848))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070976)))]; - tensor var_205_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_205_dilations_0, groups = var_205_groups_0, pad = var_205_pad_0, pad_type = var_205_pad_type_0, strides = var_205_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_205_cast_fp16")]; - string var_211_pad_type_0 = const()[name = string("op_211_pad_type_0"), val = string("valid")]; - tensor var_211_strides_0 = const()[name = string("op_211_strides_0"), val = tensor([1, 1])]; - tensor var_211_pad_0 = const()[name = string("op_211_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_211_dilations_0 = const()[name = string("op_211_dilations_0"), val = tensor([1, 1])]; - int32 var_211_groups_0 = const()[name = string("op_211_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138130624))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138073600))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_211_cast_fp16 = conv(dilations = var_211_dilations_0, groups = var_211_groups_0, pad = var_211_pad_0, pad_type = var_211_pad_type_0, strides = var_211_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_211_cast_fp16")]; - tensor obj_11_cast_fp16 = add(x = var_205_cast_fp16, y = var_211_cast_fp16)[name = string("obj_11_cast_fp16")]; - tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")]; - tensor out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor([1])]; - fp16 var_226_to_fp16 = const()[name = string("op_226_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_226_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")]; - tensor obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138335488)))]; - tensor obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138338112)))]; - fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")]; - string var_246_pad_type_0 = const()[name = string("op_246_pad_type_0"), val = string("valid")]; - tensor var_246_strides_0 = const()[name = string("op_246_strides_0"), val = tensor([1, 1])]; - tensor var_246_pad_0 = const()[name = string("op_246_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_246_dilations_0 = const()[name = string("op_246_dilations_0"), val = tensor([1, 1])]; - int32 var_246_groups_0 = const()[name = string("op_246_groups_0"), val = int32(1)]; - tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138340736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160000))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160128)))]; - tensor var_246_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_246_dilations_0, groups = var_246_groups_0, pad = var_246_pad_0, pad_type = var_246_pad_type_0, strides = var_246_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_246_cast_fp16")]; - string var_252_pad_type_0 = const()[name = string("op_252_pad_type_0"), val = string("valid")]; - tensor var_252_strides_0 = const()[name = string("op_252_strides_0"), val = tensor([1, 1])]; - tensor var_252_pad_0 = const()[name = string("op_252_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_252_dilations_0 = const()[name = string("op_252_dilations_0"), val = tensor([1, 1])]; - int32 var_252_groups_0 = const()[name = string("op_252_groups_0"), val = int32(1)]; - tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139188224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139162752))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_252_cast_fp16 = conv(dilations = var_252_dilations_0, groups = var_252_groups_0, pad = var_252_pad_0, pad_type = var_252_pad_type_0, strides = var_252_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_252_cast_fp16")]; - tensor query_3_cast_fp16 = add(x = var_246_cast_fp16, y = var_252_cast_fp16)[name = string("query_3_cast_fp16")]; - tensor var_255 = const()[name = string("op_255"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_3_cast_fp16 = reshape(shape = var_255, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")]; - fp16 var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = fp16(0x1p-3)]; - tensor var_258_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_257_to_fp16)[name = string("op_258_cast_fp16")]; - tensor var_259 = const()[name = string("op_259"), val = tensor([1, 20, 64, -1])]; - tensor var_260_cast_fp16 = reshape(shape = var_259, x = obj_17_cast_fp16)[name = string("op_260_cast_fp16")]; - bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; - bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; - tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_258_cast_fp16, y = var_260_cast_fp16)[name = string("mh_w_5_cast_fp16")]; - tensor read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")]; - tensor var_264_axes_0 = const()[name = string("op_264_axes_0"), val = tensor([1])]; - tensor var_264_cast_fp16 = expand_dims(axes = var_264_axes_0, x = read_state_4)[name = string("op_264_cast_fp16")]; - tensor var_265_axes_0 = const()[name = string("op_265_axes_0"), val = tensor([2])]; - tensor var_265_cast_fp16 = expand_dims(axes = var_265_axes_0, x = var_264_cast_fp16)[name = string("op_265_cast_fp16")]; - tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_7_cast_fp16")]; - tensor obj_23_cast_fp16 = softmax(axis = var_82, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")]; - tensor var_269 = const()[name = string("op_269"), val = tensor([1, 20, 64, -1])]; - tensor var_270_cast_fp16 = reshape(shape = var_269, x = obj_19_cast_fp16)[name = string("op_270_cast_fp16")]; - bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; - bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_270_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")]; - tensor var_273 = const()[name = string("op_273"), val = tensor([1, 1280, 1, -1])]; - tensor input_3_cast_fp16 = reshape(shape = var_273, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")]; - string var_283_pad_type_0 = const()[name = string("op_283_pad_type_0"), val = string("valid")]; - tensor var_283_strides_0 = const()[name = string("op_283_strides_0"), val = tensor([1, 1])]; - tensor var_283_pad_0 = const()[name = string("op_283_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_283_dilations_0 = const()[name = string("op_283_dilations_0"), val = tensor([1, 1])]; - int32 var_283_groups_0 = const()[name = string("op_283_groups_0"), val = int32(1)]; - tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139393088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212352))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212480)))]; - tensor var_283_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_283_dilations_0, groups = var_283_groups_0, pad = var_283_pad_0, pad_type = var_283_pad_type_0, strides = var_283_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_283_cast_fp16")]; - string var_289_pad_type_0 = const()[name = string("op_289_pad_type_0"), val = string("valid")]; - tensor var_289_strides_0 = const()[name = string("op_289_strides_0"), val = tensor([1, 1])]; - tensor var_289_pad_0 = const()[name = string("op_289_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_289_dilations_0 = const()[name = string("op_289_dilations_0"), val = tensor([1, 1])]; - int32 var_289_groups_0 = const()[name = string("op_289_groups_0"), val = int32(1)]; - tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140227264))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140215104))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_289_cast_fp16 = conv(dilations = var_289_dilations_0, groups = var_289_groups_0, pad = var_289_pad_0, pad_type = var_289_pad_type_0, strides = var_289_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_289_cast_fp16")]; - tensor obj_21_cast_fp16 = add(x = var_283_cast_fp16, y = var_289_cast_fp16)[name = string("obj_21_cast_fp16")]; - tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")]; - tensor out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor([1])]; - fp16 var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_300_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")]; - tensor input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140432128)))]; - tensor input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140434752)))]; - fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")]; - string var_318_pad_type_0 = const()[name = string("op_318_pad_type_0"), val = string("valid")]; - tensor var_318_strides_0 = const()[name = string("op_318_strides_0"), val = tensor([1, 1])]; - tensor var_318_pad_0 = const()[name = string("op_318_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_318_dilations_0 = const()[name = string("op_318_dilations_0"), val = tensor([1, 1])]; - int32 var_318_groups_0 = const()[name = string("op_318_groups_0"), val = int32(1)]; - tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140437376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714240))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714368)))]; - tensor var_318_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_318_dilations_0, groups = var_318_groups_0, pad = var_318_pad_0, pad_type = var_318_pad_type_0, strides = var_318_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_318_cast_fp16")]; - string var_324_pad_type_0 = const()[name = string("op_324_pad_type_0"), val = string("valid")]; - tensor var_324_strides_0 = const()[name = string("op_324_strides_0"), val = tensor([1, 1])]; - tensor var_324_pad_0 = const()[name = string("op_324_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_324_dilations_0 = const()[name = string("op_324_dilations_0"), val = tensor([1, 1])]; - int32 var_324_groups_0 = const()[name = string("op_324_groups_0"), val = int32(1)]; - tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143826240))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143724672))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_324_cast_fp16 = conv(dilations = var_324_dilations_0, groups = var_324_groups_0, pad = var_324_pad_0, pad_type = var_324_pad_type_0, strides = var_324_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_324_cast_fp16")]; - tensor input_7_cast_fp16 = add(x = var_318_cast_fp16, y = var_324_cast_fp16)[name = string("input_7_cast_fp16")]; - string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")]; - tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")]; - string var_335_pad_type_0 = const()[name = string("op_335_pad_type_0"), val = string("valid")]; - tensor var_335_strides_0 = const()[name = string("op_335_strides_0"), val = tensor([1, 1])]; - tensor var_335_pad_0 = const()[name = string("op_335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_335_dilations_0 = const()[name = string("op_335_dilations_0"), val = tensor([1, 1])]; - int32 var_335_groups_0 = const()[name = string("op_335_groups_0"), val = int32(1)]; - tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144645504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922368))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922496)))]; - tensor var_335_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_335_dilations_0, groups = var_335_groups_0, pad = var_335_pad_0, pad_type = var_335_pad_type_0, strides = var_335_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_335_cast_fp16")]; - string var_341_pad_type_0 = const()[name = string("op_341_pad_type_0"), val = string("valid")]; - tensor var_341_strides_0 = const()[name = string("op_341_strides_0"), val = tensor([1, 1])]; - tensor var_341_pad_0 = const()[name = string("op_341_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_341_dilations_0 = const()[name = string("op_341_dilations_0"), val = tensor([1, 1])]; - int32 var_341_groups_0 = const()[name = string("op_341_groups_0"), val = int32(1)]; - tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148107648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147925120))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_341_cast_fp16 = conv(dilations = var_341_dilations_0, groups = var_341_groups_0, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_341_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_341_cast_fp16")]; - tensor hidden_states_3_cast_fp16 = add(x = var_335_cast_fp16, y = var_341_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")]; - tensor obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor([2, 1280, 1, 1536])]; - tensor obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")]; - tensor obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor([2, 1280, 1, 1536])]; - tensor obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")]; - int32 var_363 = const()[name = string("op_363"), val = int32(3)]; - tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; - fp16 var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_388_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")]; - tensor obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148926912)))]; - tensor obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148929536)))]; - fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")]; - string var_410_pad_type_0 = const()[name = string("op_410_pad_type_0"), val = string("valid")]; - tensor var_410_strides_0 = const()[name = string("op_410_strides_0"), val = tensor([1, 1])]; - tensor var_410_pad_0 = const()[name = string("op_410_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_410_dilations_0 = const()[name = string("op_410_dilations_0"), val = tensor([1, 1])]; - int32 var_410_groups_0 = const()[name = string("op_410_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148932160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751424))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751552)))]; - tensor var_410_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_410_dilations_0, groups = var_410_groups_0, pad = var_410_pad_0, pad_type = var_410_pad_type_0, strides = var_410_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_410_cast_fp16")]; - string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")]; - tensor var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor([1, 1])]; - tensor var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor([1, 1])]; - int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149814272))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149754176))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_416_cast_fp16 = conv(dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_416_cast_fp16")]; - tensor query_5_cast_fp16 = add(x = var_410_cast_fp16, y = var_416_cast_fp16)[name = string("query_5_cast_fp16")]; - string var_425_pad_type_0 = const()[name = string("op_425_pad_type_0"), val = string("valid")]; - tensor var_425_strides_0 = const()[name = string("op_425_strides_0"), val = tensor([1, 1])]; - tensor var_425_pad_0 = const()[name = string("op_425_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_425_dilations_0 = const()[name = string("op_425_dilations_0"), val = tensor([1, 1])]; - int32 var_425_groups_0 = const()[name = string("op_425_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150019136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838400))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_425_cast_fp16 = conv(dilations = var_425_dilations_0, groups = var_425_groups_0, pad = var_425_pad_0, pad_type = var_425_pad_type_0, strides = var_425_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_425_cast_fp16")]; - string var_431_pad_type_0 = const()[name = string("op_431_pad_type_0"), val = string("valid")]; - tensor var_431_strides_0 = const()[name = string("op_431_strides_0"), val = tensor([1, 1])]; - tensor var_431_pad_0 = const()[name = string("op_431_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_431_dilations_0 = const()[name = string("op_431_dilations_0"), val = tensor([1, 1])]; - int32 var_431_groups_0 = const()[name = string("op_431_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150885184))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838528))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_431_cast_fp16 = conv(dilations = var_431_dilations_0, groups = var_431_groups_0, pad = var_431_pad_0, pad_type = var_431_pad_type_0, strides = var_431_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_431_cast_fp16")]; - tensor current_key_3_cast_fp16 = add(x = var_425_cast_fp16, y = var_431_cast_fp16)[name = string("current_key_3_cast_fp16")]; - string var_441_pad_type_0 = const()[name = string("op_441_pad_type_0"), val = string("valid")]; - tensor var_441_strides_0 = const()[name = string("op_441_strides_0"), val = tensor([1, 1])]; - tensor var_441_pad_0 = const()[name = string("op_441_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_441_dilations_0 = const()[name = string("op_441_dilations_0"), val = tensor([1, 1])]; - int32 var_441_groups_0 = const()[name = string("op_441_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151090048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909312))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909440)))]; - tensor var_441_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_441_dilations_0, groups = var_441_groups_0, pad = var_441_pad_0, pad_type = var_441_pad_type_0, strides = var_441_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_441_cast_fp16")]; - string var_447_pad_type_0 = const()[name = string("op_447_pad_type_0"), val = string("valid")]; - tensor var_447_strides_0 = const()[name = string("op_447_strides_0"), val = tensor([1, 1])]; - tensor var_447_pad_0 = const()[name = string("op_447_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_447_dilations_0 = const()[name = string("op_447_dilations_0"), val = tensor([1, 1])]; - int32 var_447_groups_0 = const()[name = string("op_447_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151934720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151912064))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_447_cast_fp16 = conv(dilations = var_447_dilations_0, groups = var_447_groups_0, pad = var_447_pad_0, pad_type = var_447_pad_type_0, strides = var_447_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_447_cast_fp16")]; - tensor current_value_3_cast_fp16 = add(x = var_441_cast_fp16, y = var_447_cast_fp16)[name = string("current_value_3_cast_fp16")]; - tensor var_453_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_453_cast_fp16")]; - tensor key_3_cast_fp16 = add(x = var_55_cast_fp16_1, y = var_453_cast_fp16)[name = string("key_3_cast_fp16")]; - tensor var_455_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_455_cast_fp16")]; - tensor value_3_cast_fp16 = add(x = var_62_cast_fp16_1, y = var_455_cast_fp16)[name = string("value_3_cast_fp16")]; - tensor var_458 = const()[name = string("op_458"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_5_cast_fp16 = reshape(shape = var_458, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")]; - fp16 var_460_to_fp16 = const()[name = string("op_460_to_fp16"), val = fp16(0x1p-3)]; - tensor var_461_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_460_to_fp16)[name = string("op_461_cast_fp16")]; - tensor var_462 = const()[name = string("op_462"), val = tensor([1, 20, 64, -1])]; - tensor var_463_cast_fp16 = reshape(shape = var_462, x = key_3_cast_fp16)[name = string("op_463_cast_fp16")]; - bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; - bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; - tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_461_cast_fp16, y = var_463_cast_fp16)[name = string("mh_w_9_cast_fp16")]; - tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_11_cast_fp16")]; - tensor var_471_cast_fp16 = softmax(axis = var_363, x = mh_w_11_cast_fp16)[name = string("op_471_cast_fp16")]; - tensor var_472 = const()[name = string("op_472"), val = tensor([1, 20, 64, -1])]; - tensor var_473_cast_fp16 = reshape(shape = var_472, x = value_3_cast_fp16)[name = string("op_473_cast_fp16")]; - bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; - bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_473_cast_fp16, y = var_471_cast_fp16)[name = string("attn_5_cast_fp16")]; - tensor var_476 = const()[name = string("op_476"), val = tensor([1, 1280, 1, -1])]; - tensor input_11_cast_fp16 = reshape(shape = var_476, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")]; - string var_486_pad_type_0 = const()[name = string("op_486_pad_type_0"), val = string("valid")]; - tensor var_486_strides_0 = const()[name = string("op_486_strides_0"), val = tensor([1, 1])]; - tensor var_486_pad_0 = const()[name = string("op_486_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_486_dilations_0 = const()[name = string("op_486_dilations_0"), val = tensor([1, 1])]; - int32 var_486_groups_0 = const()[name = string("op_486_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152139584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958848))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958976)))]; - tensor var_486_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_486_dilations_0, groups = var_486_groups_0, pad = var_486_pad_0, pad_type = var_486_pad_type_0, strides = var_486_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_486_cast_fp16")]; - string var_492_pad_type_0 = const()[name = string("op_492_pad_type_0"), val = string("valid")]; - tensor var_492_strides_0 = const()[name = string("op_492_strides_0"), val = tensor([1, 1])]; - tensor var_492_pad_0 = const()[name = string("op_492_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_492_dilations_0 = const()[name = string("op_492_dilations_0"), val = tensor([1, 1])]; - int32 var_492_groups_0 = const()[name = string("op_492_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152986048))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152961600))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_492_cast_fp16 = conv(dilations = var_492_dilations_0, groups = var_492_groups_0, pad = var_492_pad_0, pad_type = var_492_pad_type_0, strides = var_492_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_492_cast_fp16")]; - tensor obj_31_cast_fp16 = add(x = var_486_cast_fp16, y = var_492_cast_fp16)[name = string("obj_31_cast_fp16")]; - tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")]; - tensor out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor([1])]; - fp16 var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_507_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")]; - tensor obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153190912)))]; - tensor obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153193536)))]; - fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")]; - string var_527_pad_type_0 = const()[name = string("op_527_pad_type_0"), val = string("valid")]; - tensor var_527_strides_0 = const()[name = string("op_527_strides_0"), val = tensor([1, 1])]; - tensor var_527_pad_0 = const()[name = string("op_527_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_527_dilations_0 = const()[name = string("op_527_dilations_0"), val = tensor([1, 1])]; - int32 var_527_groups_0 = const()[name = string("op_527_groups_0"), val = int32(1)]; - tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153196160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015424))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015552)))]; - tensor var_527_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_527_dilations_0, groups = var_527_groups_0, pad = var_527_pad_0, pad_type = var_527_pad_type_0, strides = var_527_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_527_cast_fp16")]; - string var_533_pad_type_0 = const()[name = string("op_533_pad_type_0"), val = string("valid")]; - tensor var_533_strides_0 = const()[name = string("op_533_strides_0"), val = tensor([1, 1])]; - tensor var_533_pad_0 = const()[name = string("op_533_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_533_dilations_0 = const()[name = string("op_533_dilations_0"), val = tensor([1, 1])]; - int32 var_533_groups_0 = const()[name = string("op_533_groups_0"), val = int32(1)]; - tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154061248))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154018176))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_533_cast_fp16 = conv(dilations = var_533_dilations_0, groups = var_533_groups_0, pad = var_533_pad_0, pad_type = var_533_pad_type_0, strides = var_533_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_533_cast_fp16")]; - tensor query_7_cast_fp16 = add(x = var_527_cast_fp16, y = var_533_cast_fp16)[name = string("query_7_cast_fp16")]; - tensor var_536 = const()[name = string("op_536"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_7_cast_fp16 = reshape(shape = var_536, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")]; - fp16 var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = fp16(0x1p-3)]; - tensor var_539_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_538_to_fp16)[name = string("op_539_cast_fp16")]; - tensor var_540 = const()[name = string("op_540"), val = tensor([1, 20, 64, -1])]; - tensor var_541_cast_fp16 = reshape(shape = var_540, x = obj_35_cast_fp16)[name = string("op_541_cast_fp16")]; - bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; - bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; - tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_539_cast_fp16, y = var_541_cast_fp16)[name = string("mh_w_13_cast_fp16")]; - tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_15_cast_fp16")]; - tensor obj_41_cast_fp16 = softmax(axis = var_363, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")]; - tensor var_550 = const()[name = string("op_550"), val = tensor([1, 20, 64, -1])]; - tensor var_551_cast_fp16 = reshape(shape = var_550, x = obj_37_cast_fp16)[name = string("op_551_cast_fp16")]; - bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; - bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; - tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_551_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")]; - tensor var_554 = const()[name = string("op_554"), val = tensor([1, 1280, 1, -1])]; - tensor input_13_cast_fp16 = reshape(shape = var_554, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")]; - string var_564_pad_type_0 = const()[name = string("op_564_pad_type_0"), val = string("valid")]; - tensor var_564_strides_0 = const()[name = string("op_564_strides_0"), val = tensor([1, 1])]; - tensor var_564_pad_0 = const()[name = string("op_564_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_564_dilations_0 = const()[name = string("op_564_dilations_0"), val = tensor([1, 1])]; - int32 var_564_groups_0 = const()[name = string("op_564_groups_0"), val = int32(1)]; - tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154266112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085376))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085504)))]; - tensor var_564_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_564_dilations_0, groups = var_564_groups_0, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_564_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_564_cast_fp16")]; - string var_570_pad_type_0 = const()[name = string("op_570_pad_type_0"), val = string("valid")]; - tensor var_570_strides_0 = const()[name = string("op_570_strides_0"), val = tensor([1, 1])]; - tensor var_570_pad_0 = const()[name = string("op_570_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_570_dilations_0 = const()[name = string("op_570_dilations_0"), val = tensor([1, 1])]; - int32 var_570_groups_0 = const()[name = string("op_570_groups_0"), val = int32(1)]; - tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155098496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155088128))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_570_cast_fp16 = conv(dilations = var_570_dilations_0, groups = var_570_groups_0, pad = var_570_pad_0, pad_type = var_570_pad_type_0, strides = var_570_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_570_cast_fp16")]; - tensor obj_39_cast_fp16 = add(x = var_564_cast_fp16, y = var_570_cast_fp16)[name = string("obj_39_cast_fp16")]; - tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")]; - tensor out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor([1])]; - fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_581_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")]; - tensor input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155303360)))]; - tensor input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155305984)))]; - fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")]; - string var_599_pad_type_0 = const()[name = string("op_599_pad_type_0"), val = string("valid")]; - tensor var_599_strides_0 = const()[name = string("op_599_strides_0"), val = tensor([1, 1])]; - tensor var_599_pad_0 = const()[name = string("op_599_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_599_dilations_0 = const()[name = string("op_599_dilations_0"), val = tensor([1, 1])]; - int32 var_599_groups_0 = const()[name = string("op_599_groups_0"), val = int32(1)]; - tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155308608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585472))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585600)))]; - tensor var_599_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_599_dilations_0, groups = var_599_groups_0, pad = var_599_pad_0, pad_type = var_599_pad_type_0, strides = var_599_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_599_cast_fp16")]; - string var_605_pad_type_0 = const()[name = string("op_605_pad_type_0"), val = string("valid")]; - tensor var_605_strides_0 = const()[name = string("op_605_strides_0"), val = tensor([1, 1])]; - tensor var_605_pad_0 = const()[name = string("op_605_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_605_dilations_0 = const()[name = string("op_605_dilations_0"), val = tensor([1, 1])]; - int32 var_605_groups_0 = const()[name = string("op_605_groups_0"), val = int32(1)]; - tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158681152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158595904))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_605_cast_fp16 = conv(dilations = var_605_dilations_0, groups = var_605_groups_0, pad = var_605_pad_0, pad_type = var_605_pad_type_0, strides = var_605_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_605_cast_fp16")]; - tensor input_17_cast_fp16 = add(x = var_599_cast_fp16, y = var_605_cast_fp16)[name = string("input_17_cast_fp16")]; - string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")]; - tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")]; - string var_616_pad_type_0 = const()[name = string("op_616_pad_type_0"), val = string("valid")]; - tensor var_616_strides_0 = const()[name = string("op_616_strides_0"), val = tensor([1, 1])]; - tensor var_616_pad_0 = const()[name = string("op_616_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_616_dilations_0 = const()[name = string("op_616_dilations_0"), val = tensor([1, 1])]; - int32 var_616_groups_0 = const()[name = string("op_616_groups_0"), val = int32(1)]; - tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159500416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777280))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777408)))]; - tensor var_616_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_616_dilations_0, groups = var_616_groups_0, pad = var_616_pad_0, pad_type = var_616_pad_type_0, strides = var_616_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_616_cast_fp16")]; - string var_622_pad_type_0 = const()[name = string("op_622_pad_type_0"), val = string("valid")]; - tensor var_622_strides_0 = const()[name = string("op_622_strides_0"), val = tensor([1, 1])]; - tensor var_622_pad_0 = const()[name = string("op_622_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_622_dilations_0 = const()[name = string("op_622_dilations_0"), val = tensor([1, 1])]; - int32 var_622_groups_0 = const()[name = string("op_622_groups_0"), val = int32(1)]; - tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162868032))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162780032))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_622_cast_fp16 = conv(dilations = var_622_dilations_0, groups = var_622_groups_0, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_622_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_622_cast_fp16")]; - tensor hidden_states_5_cast_fp16 = add(x = var_616_cast_fp16, y = var_622_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; - tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")]; - tensor obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor([3, 1280, 1, 1536])]; - tensor obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")]; - tensor obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor([3, 1280, 1, 1536])]; - tensor obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")]; - int32 var_644 = const()[name = string("op_644"), val = int32(3)]; - tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; - fp16 var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_669_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")]; - tensor obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163687296)))]; - tensor obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689920)))]; - fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")]; - string var_691_pad_type_0 = const()[name = string("op_691_pad_type_0"), val = string("valid")]; - tensor var_691_strides_0 = const()[name = string("op_691_strides_0"), val = tensor([1, 1])]; - tensor var_691_pad_0 = const()[name = string("op_691_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_691_dilations_0 = const()[name = string("op_691_dilations_0"), val = tensor([1, 1])]; - int32 var_691_groups_0 = const()[name = string("op_691_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163692544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511808))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511936)))]; - tensor var_691_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_691_dilations_0, groups = var_691_groups_0, pad = var_691_pad_0, pad_type = var_691_pad_type_0, strides = var_691_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_691_cast_fp16")]; - string var_697_pad_type_0 = const()[name = string("op_697_pad_type_0"), val = string("valid")]; - tensor var_697_strides_0 = const()[name = string("op_697_strides_0"), val = tensor([1, 1])]; - tensor var_697_pad_0 = const()[name = string("op_697_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_697_dilations_0 = const()[name = string("op_697_dilations_0"), val = tensor([1, 1])]; - int32 var_697_groups_0 = const()[name = string("op_697_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164546816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164514560))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_697_cast_fp16 = conv(dilations = var_697_dilations_0, groups = var_697_groups_0, pad = var_697_pad_0, pad_type = var_697_pad_type_0, strides = var_697_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_697_cast_fp16")]; - tensor query_9_cast_fp16 = add(x = var_691_cast_fp16, y = var_697_cast_fp16)[name = string("query_9_cast_fp16")]; - string var_706_pad_type_0 = const()[name = string("op_706_pad_type_0"), val = string("valid")]; - tensor var_706_strides_0 = const()[name = string("op_706_strides_0"), val = tensor([1, 1])]; - tensor var_706_pad_0 = const()[name = string("op_706_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_706_dilations_0 = const()[name = string("op_706_dilations_0"), val = tensor([1, 1])]; - int32 var_706_groups_0 = const()[name = string("op_706_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164751680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165570944))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_706_cast_fp16 = conv(dilations = var_706_dilations_0, groups = var_706_groups_0, pad = var_706_pad_0, pad_type = var_706_pad_type_0, strides = var_706_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_706_cast_fp16")]; - string var_712_pad_type_0 = const()[name = string("op_712_pad_type_0"), val = string("valid")]; - tensor var_712_strides_0 = const()[name = string("op_712_strides_0"), val = tensor([1, 1])]; - tensor var_712_pad_0 = const()[name = string("op_712_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_712_dilations_0 = const()[name = string("op_712_dilations_0"), val = tensor([1, 1])]; - int32 var_712_groups_0 = const()[name = string("op_712_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165608576))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165571072))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_712_cast_fp16 = conv(dilations = var_712_dilations_0, groups = var_712_groups_0, pad = var_712_pad_0, pad_type = var_712_pad_type_0, strides = var_712_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_712_cast_fp16")]; - tensor current_key_5_cast_fp16 = add(x = var_706_cast_fp16, y = var_712_cast_fp16)[name = string("current_key_5_cast_fp16")]; - string var_722_pad_type_0 = const()[name = string("op_722_pad_type_0"), val = string("valid")]; - tensor var_722_strides_0 = const()[name = string("op_722_strides_0"), val = tensor([1, 1])]; - tensor var_722_pad_0 = const()[name = string("op_722_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_722_dilations_0 = const()[name = string("op_722_dilations_0"), val = tensor([1, 1])]; - int32 var_722_groups_0 = const()[name = string("op_722_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165813440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632704))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632832)))]; - tensor var_722_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_722_dilations_0, groups = var_722_groups_0, pad = var_722_pad_0, pad_type = var_722_pad_type_0, strides = var_722_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_722_cast_fp16")]; - string var_728_pad_type_0 = const()[name = string("op_728_pad_type_0"), val = string("valid")]; - tensor var_728_strides_0 = const()[name = string("op_728_strides_0"), val = tensor([1, 1])]; - tensor var_728_pad_0 = const()[name = string("op_728_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_728_dilations_0 = const()[name = string("op_728_dilations_0"), val = tensor([1, 1])]; - int32 var_728_groups_0 = const()[name = string("op_728_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166648384))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166635456))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_728_cast_fp16 = conv(dilations = var_728_dilations_0, groups = var_728_groups_0, pad = var_728_pad_0, pad_type = var_728_pad_type_0, strides = var_728_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_728_cast_fp16")]; - tensor current_value_5_cast_fp16 = add(x = var_722_cast_fp16, y = var_728_cast_fp16)[name = string("current_value_5_cast_fp16")]; - tensor var_734_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_734_cast_fp16")]; - tensor key_5_cast_fp16 = add(x = var_55_cast_fp16_2, y = var_734_cast_fp16)[name = string("key_5_cast_fp16")]; - tensor var_736_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_736_cast_fp16")]; - tensor value_5_cast_fp16 = add(x = var_62_cast_fp16_2, y = var_736_cast_fp16)[name = string("value_5_cast_fp16")]; - tensor var_739 = const()[name = string("op_739"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_9_cast_fp16 = reshape(shape = var_739, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")]; - fp16 var_741_to_fp16 = const()[name = string("op_741_to_fp16"), val = fp16(0x1p-3)]; - tensor var_742_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_741_to_fp16)[name = string("op_742_cast_fp16")]; - tensor var_743 = const()[name = string("op_743"), val = tensor([1, 20, 64, -1])]; - tensor var_744_cast_fp16 = reshape(shape = var_743, x = key_5_cast_fp16)[name = string("op_744_cast_fp16")]; - bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; - bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; - tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_742_cast_fp16, y = var_744_cast_fp16)[name = string("mh_w_17_cast_fp16")]; - tensor mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_19_cast_fp16")]; - tensor var_752_cast_fp16 = softmax(axis = var_644, x = mh_w_19_cast_fp16)[name = string("op_752_cast_fp16")]; - tensor var_753 = const()[name = string("op_753"), val = tensor([1, 20, 64, -1])]; - tensor var_754_cast_fp16 = reshape(shape = var_753, x = value_5_cast_fp16)[name = string("op_754_cast_fp16")]; - bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; - bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; - tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_754_cast_fp16, y = var_752_cast_fp16)[name = string("attn_9_cast_fp16")]; - tensor var_757 = const()[name = string("op_757"), val = tensor([1, 1280, 1, -1])]; - tensor input_21_cast_fp16 = reshape(shape = var_757, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")]; - string var_767_pad_type_0 = const()[name = string("op_767_pad_type_0"), val = string("valid")]; - tensor var_767_strides_0 = const()[name = string("op_767_strides_0"), val = tensor([1, 1])]; - tensor var_767_pad_0 = const()[name = string("op_767_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_767_dilations_0 = const()[name = string("op_767_dilations_0"), val = tensor([1, 1])]; - int32 var_767_groups_0 = const()[name = string("op_767_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166853248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672512))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672640)))]; - tensor var_767_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_767_cast_fp16")]; - string var_773_pad_type_0 = const()[name = string("op_773_pad_type_0"), val = string("valid")]; - tensor var_773_strides_0 = const()[name = string("op_773_strides_0"), val = tensor([1, 1])]; - tensor var_773_pad_0 = const()[name = string("op_773_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_773_dilations_0 = const()[name = string("op_773_dilations_0"), val = tensor([1, 1])]; - int32 var_773_groups_0 = const()[name = string("op_773_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167686720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167675264))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_773_cast_fp16")]; - tensor obj_49_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = string("obj_49_cast_fp16")]; - tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")]; - tensor out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor([1])]; - fp16 var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_788_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")]; - tensor obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167891584)))]; - tensor obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167894208)))]; - fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")]; - string var_808_pad_type_0 = const()[name = string("op_808_pad_type_0"), val = string("valid")]; - tensor var_808_strides_0 = const()[name = string("op_808_strides_0"), val = tensor([1, 1])]; - tensor var_808_pad_0 = const()[name = string("op_808_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_808_dilations_0 = const()[name = string("op_808_dilations_0"), val = tensor([1, 1])]; - int32 var_808_groups_0 = const()[name = string("op_808_groups_0"), val = int32(1)]; - tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167896832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716096))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716224)))]; - tensor var_808_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_808_dilations_0, groups = var_808_groups_0, pad = var_808_pad_0, pad_type = var_808_pad_type_0, strides = var_808_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_808_cast_fp16")]; - string var_814_pad_type_0 = const()[name = string("op_814_pad_type_0"), val = string("valid")]; - tensor var_814_strides_0 = const()[name = string("op_814_strides_0"), val = tensor([1, 1])]; - tensor var_814_pad_0 = const()[name = string("op_814_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_814_dilations_0 = const()[name = string("op_814_dilations_0"), val = tensor([1, 1])]; - int32 var_814_groups_0 = const()[name = string("op_814_groups_0"), val = int32(1)]; - tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168746560))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168718848))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_814_cast_fp16 = conv(dilations = var_814_dilations_0, groups = var_814_groups_0, pad = var_814_pad_0, pad_type = var_814_pad_type_0, strides = var_814_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_814_cast_fp16")]; - tensor query_11_cast_fp16 = add(x = var_808_cast_fp16, y = var_814_cast_fp16)[name = string("query_11_cast_fp16")]; - tensor var_817 = const()[name = string("op_817"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_11_cast_fp16 = reshape(shape = var_817, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")]; - fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1p-3)]; - tensor var_820_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_819_to_fp16)[name = string("op_820_cast_fp16")]; - tensor var_821 = const()[name = string("op_821"), val = tensor([1, 20, 64, -1])]; - tensor var_822_cast_fp16 = reshape(shape = var_821, x = obj_53_cast_fp16)[name = string("op_822_cast_fp16")]; - bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; - bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; - tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_820_cast_fp16, y = var_822_cast_fp16)[name = string("mh_w_21_cast_fp16")]; - tensor mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_23_cast_fp16")]; - tensor obj_59_cast_fp16 = softmax(axis = var_644, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")]; - tensor var_831 = const()[name = string("op_831"), val = tensor([1, 20, 64, -1])]; - tensor var_832_cast_fp16 = reshape(shape = var_831, x = obj_55_cast_fp16)[name = string("op_832_cast_fp16")]; - bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; - bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; - tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_832_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")]; - tensor var_835 = const()[name = string("op_835"), val = tensor([1, 1280, 1, -1])]; - tensor input_23_cast_fp16 = reshape(shape = var_835, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")]; - string var_845_pad_type_0 = const()[name = string("op_845_pad_type_0"), val = string("valid")]; - tensor var_845_strides_0 = const()[name = string("op_845_strides_0"), val = tensor([1, 1])]; - tensor var_845_pad_0 = const()[name = string("op_845_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_845_dilations_0 = const()[name = string("op_845_dilations_0"), val = tensor([1, 1])]; - int32 var_845_groups_0 = const()[name = string("op_845_groups_0"), val = int32(1)]; - tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168951424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770688))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770816)))]; - tensor var_845_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_845_dilations_0, groups = var_845_groups_0, pad = var_845_pad_0, pad_type = var_845_pad_type_0, strides = var_845_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_845_cast_fp16")]; - string var_851_pad_type_0 = const()[name = string("op_851_pad_type_0"), val = string("valid")]; - tensor var_851_strides_0 = const()[name = string("op_851_strides_0"), val = tensor([1, 1])]; - tensor var_851_pad_0 = const()[name = string("op_851_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_851_dilations_0 = const()[name = string("op_851_dilations_0"), val = tensor([1, 1])]; - int32 var_851_groups_0 = const()[name = string("op_851_groups_0"), val = int32(1)]; - tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169786432))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169773440))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_851_cast_fp16 = conv(dilations = var_851_dilations_0, groups = var_851_groups_0, pad = var_851_pad_0, pad_type = var_851_pad_type_0, strides = var_851_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_851_cast_fp16")]; - tensor obj_57_cast_fp16 = add(x = var_845_cast_fp16, y = var_851_cast_fp16)[name = string("obj_57_cast_fp16")]; - tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")]; - tensor out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor([1])]; - fp16 var_865_to_fp16 = const()[name = string("op_865_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_865_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")]; - tensor input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169991296)))]; - tensor input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169993920)))]; - fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")]; - string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")]; - tensor var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor([1, 1])]; - tensor var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor([1, 1])]; - int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)]; - tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169996544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273408))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273536)))]; - tensor var_883_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_883_cast_fp16")]; - string var_889_pad_type_0 = const()[name = string("op_889_pad_type_0"), val = string("valid")]; - tensor var_889_strides_0 = const()[name = string("op_889_strides_0"), val = tensor([1, 1])]; - tensor var_889_pad_0 = const()[name = string("op_889_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_889_dilations_0 = const()[name = string("op_889_dilations_0"), val = tensor([1, 1])]; - int32 var_889_groups_0 = const()[name = string("op_889_groups_0"), val = int32(1)]; - tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173445760))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173283840))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_889_cast_fp16 = conv(dilations = var_889_dilations_0, groups = var_889_groups_0, pad = var_889_pad_0, pad_type = var_889_pad_type_0, strides = var_889_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_889_cast_fp16")]; - tensor input_27_cast_fp16 = add(x = var_883_cast_fp16, y = var_889_cast_fp16)[name = string("input_27_cast_fp16")]; - string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")]; - tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; - string var_900_pad_type_0 = const()[name = string("op_900_pad_type_0"), val = string("valid")]; - tensor var_900_strides_0 = const()[name = string("op_900_strides_0"), val = tensor([1, 1])]; - tensor var_900_pad_0 = const()[name = string("op_900_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_900_dilations_0 = const()[name = string("op_900_dilations_0"), val = tensor([1, 1])]; - int32 var_900_groups_0 = const()[name = string("op_900_groups_0"), val = int32(1)]; - tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174265024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177541888))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177542016)))]; - tensor var_900_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_900_dilations_0, groups = var_900_groups_0, pad = var_900_pad_0, pad_type = var_900_pad_type_0, strides = var_900_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_900_cast_fp16")]; - string var_906_pad_type_0 = const()[name = string("op_906_pad_type_0"), val = string("valid")]; - tensor var_906_strides_0 = const()[name = string("op_906_strides_0"), val = tensor([1, 1])]; - tensor var_906_pad_0 = const()[name = string("op_906_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_906_dilations_0 = const()[name = string("op_906_dilations_0"), val = tensor([1, 1])]; - int32 var_906_groups_0 = const()[name = string("op_906_groups_0"), val = int32(1)]; - tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177624832))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177544640))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_906_cast_fp16 = conv(dilations = var_906_dilations_0, groups = var_906_groups_0, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_906_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_906_cast_fp16")]; - tensor hidden_states_7_cast_fp16 = add(x = var_900_cast_fp16, y = var_906_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; - tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")]; - tensor obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor([4, 1280, 1, 1536])]; - tensor obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")]; - tensor obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor([4, 1280, 1, 1536])]; - tensor obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor([false, true, true, true])]; - tensor obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")]; - int32 var_929 = const()[name = string("op_929"), val = int32(3)]; - tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; - fp16 var_954_to_fp16 = const()[name = string("op_954_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_954_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")]; - tensor obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178444096)))]; - tensor obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178446720)))]; - fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")]; - string var_976_pad_type_0 = const()[name = string("op_976_pad_type_0"), val = string("valid")]; - tensor var_976_strides_0 = const()[name = string("op_976_strides_0"), val = tensor([1, 1])]; - tensor var_976_pad_0 = const()[name = string("op_976_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_976_dilations_0 = const()[name = string("op_976_dilations_0"), val = tensor([1, 1])]; - int32 var_976_groups_0 = const()[name = string("op_976_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178449344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268608))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268736)))]; - tensor var_976_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_976_dilations_0, groups = var_976_groups_0, pad = var_976_pad_0, pad_type = var_976_pad_type_0, strides = var_976_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_976_cast_fp16")]; - string var_982_pad_type_0 = const()[name = string("op_982_pad_type_0"), val = string("valid")]; - tensor var_982_strides_0 = const()[name = string("op_982_strides_0"), val = tensor([1, 1])]; - tensor var_982_pad_0 = const()[name = string("op_982_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_982_dilations_0 = const()[name = string("op_982_dilations_0"), val = tensor([1, 1])]; - int32 var_982_groups_0 = const()[name = string("op_982_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179292800))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179271360))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_982_cast_fp16 = conv(dilations = var_982_dilations_0, groups = var_982_groups_0, pad = var_982_pad_0, pad_type = var_982_pad_type_0, strides = var_982_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_982_cast_fp16")]; - tensor query_13_cast_fp16 = add(x = var_976_cast_fp16, y = var_982_cast_fp16)[name = string("query_13_cast_fp16")]; - string var_991_pad_type_0 = const()[name = string("op_991_pad_type_0"), val = string("valid")]; - tensor var_991_strides_0 = const()[name = string("op_991_strides_0"), val = tensor([1, 1])]; - tensor var_991_pad_0 = const()[name = string("op_991_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_991_dilations_0 = const()[name = string("op_991_dilations_0"), val = tensor([1, 1])]; - int32 var_991_groups_0 = const()[name = string("op_991_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179497664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180316928))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_991_cast_fp16 = conv(dilations = var_991_dilations_0, groups = var_991_groups_0, pad = var_991_pad_0, pad_type = var_991_pad_type_0, strides = var_991_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_991_cast_fp16")]; - string var_997_pad_type_0 = const()[name = string("op_997_pad_type_0"), val = string("valid")]; - tensor var_997_strides_0 = const()[name = string("op_997_strides_0"), val = tensor([1, 1])]; - tensor var_997_pad_0 = const()[name = string("op_997_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_997_dilations_0 = const()[name = string("op_997_dilations_0"), val = tensor([1, 1])]; - int32 var_997_groups_0 = const()[name = string("op_997_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180337920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180317056))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_997_cast_fp16 = conv(dilations = var_997_dilations_0, groups = var_997_groups_0, pad = var_997_pad_0, pad_type = var_997_pad_type_0, strides = var_997_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_997_cast_fp16")]; - tensor current_key_cast_fp16 = add(x = var_991_cast_fp16, y = var_997_cast_fp16)[name = string("current_key_cast_fp16")]; - string var_1007_pad_type_0 = const()[name = string("op_1007_pad_type_0"), val = string("valid")]; - tensor var_1007_strides_0 = const()[name = string("op_1007_strides_0"), val = tensor([1, 1])]; - tensor var_1007_pad_0 = const()[name = string("op_1007_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1007_dilations_0 = const()[name = string("op_1007_dilations_0"), val = tensor([1, 1])]; - int32 var_1007_groups_0 = const()[name = string("op_1007_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180542784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362048))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362176)))]; - tensor var_1007_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1007_dilations_0, groups = var_1007_groups_0, pad = var_1007_pad_0, pad_type = var_1007_pad_type_0, strides = var_1007_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1007_cast_fp16")]; - string var_1013_pad_type_0 = const()[name = string("op_1013_pad_type_0"), val = string("valid")]; - tensor var_1013_strides_0 = const()[name = string("op_1013_strides_0"), val = tensor([1, 1])]; - tensor var_1013_pad_0 = const()[name = string("op_1013_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1013_dilations_0 = const()[name = string("op_1013_dilations_0"), val = tensor([1, 1])]; - int32 var_1013_groups_0 = const()[name = string("op_1013_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181379584))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181364800))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1013_cast_fp16 = conv(dilations = var_1013_dilations_0, groups = var_1013_groups_0, pad = var_1013_pad_0, pad_type = var_1013_pad_type_0, strides = var_1013_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1013_cast_fp16")]; - tensor current_value_cast_fp16 = add(x = var_1007_cast_fp16, y = var_1013_cast_fp16)[name = string("current_value_cast_fp16")]; - tensor var_1019_cast_fp16 = mul(x = current_key_cast_fp16, y = var_170_cast_fp16)[name = string("op_1019_cast_fp16")]; - tensor key_cast_fp16 = add(x = var_55_cast_fp16_3, y = var_1019_cast_fp16)[name = string("key_cast_fp16")]; - tensor var_1021_cast_fp16 = mul(x = current_value_cast_fp16, y = var_170_cast_fp16)[name = string("op_1021_cast_fp16")]; - tensor value_cast_fp16 = add(x = var_62_cast_fp16_3, y = var_1021_cast_fp16)[name = string("value_cast_fp16")]; - tensor var_1024 = const()[name = string("op_1024"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_13_cast_fp16 = reshape(shape = var_1024, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")]; - fp16 var_1026_to_fp16 = const()[name = string("op_1026_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1027_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1026_to_fp16)[name = string("op_1027_cast_fp16")]; - tensor var_1028 = const()[name = string("op_1028"), val = tensor([1, 20, 64, -1])]; - tensor var_1029_cast_fp16 = reshape(shape = var_1028, x = key_cast_fp16)[name = string("op_1029_cast_fp16")]; - bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)]; - bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)]; - tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1027_cast_fp16, y = var_1029_cast_fp16)[name = string("mh_w_25_cast_fp16")]; - tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_27_cast_fp16")]; - tensor var_1037_cast_fp16 = softmax(axis = var_929, x = mh_w_27_cast_fp16)[name = string("op_1037_cast_fp16")]; - tensor var_1038 = const()[name = string("op_1038"), val = tensor([1, 20, 64, -1])]; - tensor var_1039_cast_fp16 = reshape(shape = var_1038, x = value_cast_fp16)[name = string("op_1039_cast_fp16")]; - bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; - bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; - tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_13_cast_fp16")]; - tensor var_1042 = const()[name = string("op_1042"), val = tensor([1, 1280, 1, -1])]; - tensor input_31_cast_fp16 = reshape(shape = var_1042, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")]; - string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")]; - tensor var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor([1, 1])]; - tensor var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor([1, 1])]; - int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181584448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403712))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403840)))]; - tensor var_1052_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1052_cast_fp16")]; - string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")]; - tensor var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor([1, 1])]; - tensor var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor([1, 1])]; - int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182420992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182406464))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1058_cast_fp16")]; - tensor obj_67_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_67_cast_fp16")]; - tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")]; - tensor out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor([1])]; - fp16 var_1073_to_fp16 = const()[name = string("op_1073_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1073_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")]; - tensor obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182625856)))]; - tensor obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182628480)))]; - fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")]; - string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")]; - tensor var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor([1, 1])]; - tensor var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor([1, 1])]; - int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)]; - tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182631104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450368))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450496)))]; - tensor var_1093_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1093_cast_fp16")]; - string var_1099_pad_type_0 = const()[name = string("op_1099_pad_type_0"), val = string("valid")]; - tensor var_1099_strides_0 = const()[name = string("op_1099_strides_0"), val = tensor([1, 1])]; - tensor var_1099_pad_0 = const()[name = string("op_1099_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1099_dilations_0 = const()[name = string("op_1099_dilations_0"), val = tensor([1, 1])]; - int32 var_1099_groups_0 = const()[name = string("op_1099_groups_0"), val = int32(1)]; - tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183468544))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183453120))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1099_cast_fp16 = conv(dilations = var_1099_dilations_0, groups = var_1099_groups_0, pad = var_1099_pad_0, pad_type = var_1099_pad_type_0, strides = var_1099_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1099_cast_fp16")]; - tensor query_cast_fp16 = add(x = var_1093_cast_fp16, y = var_1099_cast_fp16)[name = string("query_cast_fp16")]; - tensor var_1102 = const()[name = string("op_1102"), val = tensor([1, 20, 64, -1])]; - tensor mh_q_cast_fp16 = reshape(shape = var_1102, x = query_cast_fp16)[name = string("mh_q_cast_fp16")]; - fp16 var_1104_to_fp16 = const()[name = string("op_1104_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1105_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_1104_to_fp16)[name = string("op_1105_cast_fp16")]; - tensor var_1106 = const()[name = string("op_1106"), val = tensor([1, 20, 64, -1])]; - tensor var_1107_cast_fp16 = reshape(shape = var_1106, x = obj_71_cast_fp16)[name = string("op_1107_cast_fp16")]; - bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)]; - bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)]; - tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1105_cast_fp16, y = var_1107_cast_fp16)[name = string("mh_w_29_cast_fp16")]; - tensor mh_w_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_cast_fp16")]; - tensor obj_77_cast_fp16 = softmax(axis = var_929, x = mh_w_cast_fp16)[name = string("obj_77_cast_fp16")]; - tensor var_1116 = const()[name = string("op_1116"), val = tensor([1, 20, 64, -1])]; - tensor var_1117_cast_fp16 = reshape(shape = var_1116, x = obj_73_cast_fp16)[name = string("op_1117_cast_fp16")]; - bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; - bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; - tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_1117_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_cast_fp16")]; - tensor var_1120 = const()[name = string("op_1120"), val = tensor([1, 1280, 1, -1])]; - tensor input_33_cast_fp16 = reshape(shape = var_1120, x = attn_cast_fp16)[name = string("input_33_cast_fp16")]; - string var_1130_pad_type_0 = const()[name = string("op_1130_pad_type_0"), val = string("valid")]; - tensor var_1130_strides_0 = const()[name = string("op_1130_strides_0"), val = tensor([1, 1])]; - tensor var_1130_pad_0 = const()[name = string("op_1130_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1130_dilations_0 = const()[name = string("op_1130_dilations_0"), val = tensor([1, 1])]; - int32 var_1130_groups_0 = const()[name = string("op_1130_groups_0"), val = int32(1)]; - tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183673408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492672))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492800)))]; - tensor var_1130_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1130_dilations_0, groups = var_1130_groups_0, pad = var_1130_pad_0, pad_type = var_1130_pad_type_0, strides = var_1130_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1130_cast_fp16")]; - string var_1136_pad_type_0 = const()[name = string("op_1136_pad_type_0"), val = string("valid")]; - tensor var_1136_strides_0 = const()[name = string("op_1136_strides_0"), val = tensor([1, 1])]; - tensor var_1136_pad_0 = const()[name = string("op_1136_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1136_dilations_0 = const()[name = string("op_1136_dilations_0"), val = tensor([1, 1])]; - int32 var_1136_groups_0 = const()[name = string("op_1136_groups_0"), val = int32(1)]; - tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184507136))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184495424))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1136_cast_fp16 = conv(dilations = var_1136_dilations_0, groups = var_1136_groups_0, pad = var_1136_pad_0, pad_type = var_1136_pad_type_0, strides = var_1136_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1136_cast_fp16")]; - tensor obj_75_cast_fp16 = add(x = var_1130_cast_fp16, y = var_1136_cast_fp16)[name = string("obj_75_cast_fp16")]; - tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")]; - tensor out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor([1])]; - fp16 var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1150_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")]; - tensor input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184712000)))]; - tensor input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184714624)))]; - fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")]; - string var_1168_pad_type_0 = const()[name = string("op_1168_pad_type_0"), val = string("valid")]; - tensor var_1168_strides_0 = const()[name = string("op_1168_strides_0"), val = tensor([1, 1])]; - tensor var_1168_pad_0 = const()[name = string("op_1168_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1168_dilations_0 = const()[name = string("op_1168_dilations_0"), val = tensor([1, 1])]; - int32 var_1168_groups_0 = const()[name = string("op_1168_groups_0"), val = int32(1)]; - tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184717248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994112))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994240)))]; - tensor var_1168_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1168_dilations_0, groups = var_1168_groups_0, pad = var_1168_pad_0, pad_type = var_1168_pad_type_0, strides = var_1168_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1168_cast_fp16")]; - string var_1174_pad_type_0 = const()[name = string("op_1174_pad_type_0"), val = string("valid")]; - tensor var_1174_strides_0 = const()[name = string("op_1174_strides_0"), val = tensor([1, 1])]; - tensor var_1174_pad_0 = const()[name = string("op_1174_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1174_dilations_0 = const()[name = string("op_1174_dilations_0"), val = tensor([1, 1])]; - int32 var_1174_groups_0 = const()[name = string("op_1174_groups_0"), val = int32(1)]; - tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188057280))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188004544))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1174_cast_fp16 = conv(dilations = var_1174_dilations_0, groups = var_1174_groups_0, pad = var_1174_pad_0, pad_type = var_1174_pad_type_0, strides = var_1174_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1174_cast_fp16")]; - tensor input_37_cast_fp16 = add(x = var_1168_cast_fp16, y = var_1174_cast_fp16)[name = string("input_37_cast_fp16")]; - string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")]; - tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = string("input_cast_fp16")]; - string var_1185_pad_type_0 = const()[name = string("op_1185_pad_type_0"), val = string("valid")]; - tensor var_1185_strides_0 = const()[name = string("op_1185_strides_0"), val = tensor([1, 1])]; - tensor var_1185_pad_0 = const()[name = string("op_1185_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1185_dilations_0 = const()[name = string("op_1185_dilations_0"), val = tensor([1, 1])]; - int32 var_1185_groups_0 = const()[name = string("op_1185_groups_0"), val = int32(1)]; - tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188876544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153408))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153536)))]; - tensor var_1185_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1185_dilations_0, groups = var_1185_groups_0, pad = var_1185_pad_0, pad_type = var_1185_pad_type_0, strides = var_1185_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_1185_cast_fp16")]; - string var_1191_pad_type_0 = const()[name = string("op_1191_pad_type_0"), val = string("valid")]; - tensor var_1191_strides_0 = const()[name = string("op_1191_strides_0"), val = tensor([1, 1])]; - tensor var_1191_pad_0 = const()[name = string("op_1191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1191_dilations_0 = const()[name = string("op_1191_dilations_0"), val = tensor([1, 1])]; - int32 var_1191_groups_0 = const()[name = string("op_1191_groups_0"), val = int32(1)]; - tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192226688))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192156160))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1191_cast_fp16 = conv(dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_1191_cast_fp16")]; - tensor hidden_states_9_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1191_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; - tensor inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_cast_fp16")]; - tensor out_axes_0 = const()[name = string("out_axes_0"), val = tensor([1])]; - fp16 var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_1211_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")]; - tensor hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193045952)))]; - tensor hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193048576)))]; - fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")]; - tensor var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor([2])]; - tensor var_1222_cast_fp16 = squeeze(axes = var_1222_axes_0, x = hidden_states_cast_fp16)[name = string("op_1222_cast_fp16")]; - tensor var_1225_perm_0 = const()[name = string("op_1225_perm_0"), val = tensor([0, 2, 1])]; - tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193051200)))]; - tensor var_1225_cast_fp16 = transpose(perm = var_1225_perm_0, x = var_1222_cast_fp16)[name = string("transpose_0")]; - tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_1225_cast_fp16)[name = string("linear_0_cast_fp16")]; - int32 var_1229 = const()[name = string("op_1229"), val = int32(1)]; - bool obj_81_interleave_0 = const()[name = string("obj_81_interleave_0"), val = bool(false)]; - tensor key_cache_updates = concat(axis = var_1229, interleave = obj_81_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = string("obj_81_cast_fp16")]; - int32 var_1232 = const()[name = string("op_1232"), val = int32(1)]; - bool obj_83_interleave_0 = const()[name = string("obj_83_interleave_0"), val = bool(false)]; - tensor value_cache_updates = concat(axis = var_1232, interleave = obj_83_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = string("obj_83_cast_fp16")]; - tensor var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor([0, 4, 0, 0])]; - tensor var_1243_end_0 = const()[name = string("op_1243_end_0"), val = tensor([1, 5, 1, 1536])]; - tensor var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = var_1243_end_0, end_mask = var_1243_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1243_cast_fp16")]; - tensor var_1246_begin_0 = const()[name = string("op_1246_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1246_end_0 = const()[name = string("op_1246_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1246_end_mask_0 = const()[name = string("op_1246_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1246_squeeze_mask_0 = const()[name = string("op_1246_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, squeeze_mask = var_1246_squeeze_mask_0, x = var_1243_cast_fp16)[name = string("op_1246_cast_fp16")]; - tensor var_1261_begin_0 = const()[name = string("op_1261_begin_0"), val = tensor([0, 11, 0, 0])]; - tensor var_1261_end_0 = const()[name = string("op_1261_end_0"), val = tensor([1, 12, 1, 1536])]; - tensor var_1261_end_mask_0 = const()[name = string("op_1261_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1261_cast_fp16 = slice_by_index(begin = var_1261_begin_0, end = var_1261_end_0, end_mask = var_1261_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1261_cast_fp16")]; - tensor var_1264_begin_0 = const()[name = string("op_1264_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1264_end_0 = const()[name = string("op_1264_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1264_end_mask_0 = const()[name = string("op_1264_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1264_squeeze_mask_0 = const()[name = string("op_1264_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, squeeze_mask = var_1264_squeeze_mask_0, x = var_1261_cast_fp16)[name = string("op_1264_cast_fp16")]; - tensor var_1279_begin_0 = const()[name = string("op_1279_begin_0"), val = tensor([0, 3, 0, 0])]; - tensor var_1279_end_0 = const()[name = string("op_1279_end_0"), val = tensor([1, 4, 1, 1536])]; - tensor var_1279_end_mask_0 = const()[name = string("op_1279_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1279_cast_fp16")]; - tensor var_1282_begin_0 = const()[name = string("op_1282_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1282_end_0 = const()[name = string("op_1282_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1282_end_mask_0 = const()[name = string("op_1282_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1282_squeeze_mask_0 = const()[name = string("op_1282_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1282_cast_fp16 = slice_by_index(begin = var_1282_begin_0, end = var_1282_end_0, end_mask = var_1282_end_mask_0, squeeze_mask = var_1282_squeeze_mask_0, x = var_1279_cast_fp16)[name = string("op_1282_cast_fp16")]; - tensor var_1297_begin_0 = const()[name = string("op_1297_begin_0"), val = tensor([0, 6, 0, 0])]; - tensor var_1297_end_0 = const()[name = string("op_1297_end_0"), val = tensor([1, 7, 1, 1536])]; - tensor var_1297_end_mask_0 = const()[name = string("op_1297_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1297_cast_fp16 = slice_by_index(begin = var_1297_begin_0, end = var_1297_end_0, end_mask = var_1297_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1297_cast_fp16")]; - tensor var_1300_begin_0 = const()[name = string("op_1300_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1300_end_0 = const()[name = string("op_1300_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1300_end_mask_0 = const()[name = string("op_1300_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1300_squeeze_mask_0 = const()[name = string("op_1300_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, squeeze_mask = var_1300_squeeze_mask_0, x = var_1297_cast_fp16)[name = string("op_1300_cast_fp16")]; - tensor var_1315_begin_0 = const()[name = string("op_1315_begin_0"), val = tensor([0, 11, 0, 0])]; - tensor var_1315_end_0 = const()[name = string("op_1315_end_0"), val = tensor([1, 12, 1, 1536])]; - tensor var_1315_end_mask_0 = const()[name = string("op_1315_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1315_cast_fp16 = slice_by_index(begin = var_1315_begin_0, end = var_1315_end_0, end_mask = var_1315_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1315_cast_fp16")]; - tensor var_1318_begin_0 = const()[name = string("op_1318_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1318_end_0 = const()[name = string("op_1318_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1318_end_mask_0 = const()[name = string("op_1318_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1318_squeeze_mask_0 = const()[name = string("op_1318_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1318_cast_fp16 = slice_by_index(begin = var_1318_begin_0, end = var_1318_end_0, end_mask = var_1318_end_mask_0, squeeze_mask = var_1318_squeeze_mask_0, x = var_1315_cast_fp16)[name = string("op_1318_cast_fp16")]; - tensor var_1333_begin_0 = const()[name = string("op_1333_begin_0"), val = tensor([0, 14, 0, 0])]; - tensor var_1333_end_0 = const()[name = string("op_1333_end_0"), val = tensor([1, 15, 1, 1536])]; - tensor var_1333_end_mask_0 = const()[name = string("op_1333_end_mask_0"), val = tensor([true, false, true, true])]; - tensor var_1333_cast_fp16 = slice_by_index(begin = var_1333_begin_0, end = var_1333_end_0, end_mask = var_1333_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1333_cast_fp16")]; - tensor var_1336_begin_0 = const()[name = string("op_1336_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1336_end_0 = const()[name = string("op_1336_end_0"), val = tensor([1, 1, 1, 1536])]; - tensor var_1336_end_mask_0 = const()[name = string("op_1336_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1336_squeeze_mask_0 = const()[name = string("op_1336_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, squeeze_mask = var_1336_squeeze_mask_0, x = var_1333_cast_fp16)[name = string("op_1336_cast_fp16")]; - int32 var_1343 = const()[name = string("op_1343"), val = int32(1)]; - bool var_1344_interleave_0 = const()[name = string("op_1344_interleave_0"), val = bool(false)]; - tensor var_1344_cast_fp16 = concat(axis = var_1343, interleave = var_1344_interleave_0, values = (var_1246_cast_fp16, var_1264_cast_fp16, var_1282_cast_fp16, var_1300_cast_fp16, var_1318_cast_fp16, var_1336_cast_fp16))[name = string("op_1344_cast_fp16")]; - bool var_1347 = const()[name = string("op_1347"), val = bool(false)]; - tensor obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor([1])]; - tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_1347, x = var_1344_cast_fp16)[name = string("obj_cast_fp16")]; - } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); -} \ No newline at end of file