diff --git "a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" deleted file mode 100644--- "a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" +++ /dev/null @@ -1,2011 +0,0 @@ -program(1.3) -[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] -{ - func main(tensor melspectrogram_features) { - string var_100_pad_type_0 = const()[name = string("op_100_pad_type_0"), val = string("custom")]; - tensor var_100_pad_0 = const()[name = string("op_100_pad_0"), val = tensor([0, 0, 1, 1])]; - tensor var_100_strides_0 = const()[name = string("op_100_strides_0"), val = tensor([1, 1])]; - tensor var_100_dilations_0 = const()[name = string("op_100_dilations_0"), val = tensor([1, 1])]; - int32 var_100_groups_0 = const()[name = string("op_100_groups_0"), val = int32(1)]; - tensor var_69_to_fp16 = const()[name = string("op_69_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; - tensor var_81_to_fp16 = const()[name = string("op_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))]; - tensor var_100_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_100_dilations_0, groups = var_100_groups_0, pad = var_100_pad_0, pad_type = var_100_pad_type_0, strides = var_100_strides_0, weight = var_69_to_fp16, x = melspectrogram_features)[name = string("op_100_cast_fp16")]; - string var_138_pad_type_0 = const()[name = string("op_138_pad_type_0"), val = string("custom")]; - tensor var_138_pad_0 = const()[name = string("op_138_pad_0"), val = tensor([0, 0, 1, 1])]; - tensor var_138_strides_0 = const()[name = string("op_138_strides_0"), val = tensor([1, 1])]; - tensor var_138_dilations_0 = const()[name = string("op_138_dilations_0"), val = tensor([1, 1])]; - int32 var_138_groups_0 = const()[name = string("op_138_groups_0"), val = int32(1)]; - tensor op_113_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462592))))[name = string("op_113_to_fp16_palettized")]; - tensor var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462720)))]; - tensor var_138_cast_fp16 = conv(bias = var_119_to_fp16, dilations = var_138_dilations_0, groups = var_138_groups_0, pad = var_138_pad_0, pad_type = var_138_pad_type_0, strides = var_138_strides_0, weight = op_113_to_fp16_palettized, x = melspectrogram_features)[name = string("op_138_cast_fp16")]; - tensor var_140_cast_fp16 = add(x = var_100_cast_fp16, y = var_138_cast_fp16)[name = string("op_140_cast_fp16")]; - string hidden_states_1_mode_0 = const()[name = string("hidden_states_1_mode_0"), val = string("EXACT")]; - tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_140_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; - string var_186_pad_type_0 = const()[name = string("op_186_pad_type_0"), val = string("custom")]; - tensor var_186_pad_0 = const()[name = string("op_186_pad_0"), val = tensor([0, 0, 1, 1])]; - tensor var_186_strides_0 = const()[name = string("op_186_strides_0"), val = tensor([2, 2])]; - tensor var_186_dilations_0 = const()[name = string("op_186_dilations_0"), val = tensor([1, 1])]; - int32 var_186_groups_0 = const()[name = string("op_186_groups_0"), val = int32(1)]; - tensor var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464320)))]; - tensor var_186_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = var_155_to_fp16, x = hidden_states_1_cast_fp16)[name = string("op_186_cast_fp16")]; - string var_224_pad_type_0 = const()[name = string("op_224_pad_type_0"), val = string("custom")]; - tensor var_224_pad_0 = const()[name = string("op_224_pad_0"), val = tensor([0, 0, 1, 1])]; - tensor var_224_strides_0 = const()[name = string("op_224_strides_0"), val = tensor([2, 2])]; - tensor var_224_dilations_0 = const()[name = string("op_224_dilations_0"), val = tensor([1, 1])]; - int32 var_224_groups_0 = const()[name = string("op_224_groups_0"), val = int32(1)]; - tensor op_199_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4003328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888128))))[name = string("op_199_to_fp16_palettized")]; - tensor var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888256)))]; - tensor var_224_cast_fp16 = conv(bias = var_205_to_fp16, dilations = var_224_dilations_0, groups = var_224_groups_0, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_224_strides_0, weight = op_199_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = string("op_224_cast_fp16")]; - tensor var_226_cast_fp16 = add(x = var_186_cast_fp16, y = var_224_cast_fp16)[name = string("op_226_cast_fp16")]; - string hidden_states_3_mode_0 = const()[name = string("hidden_states_3_mode_0"), val = string("EXACT")]; - tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_226_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor var_246_to_fp16 = const()[name = string("op_246_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4889856)))]; - tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_246_to_fp16)[name = string("inputs_1_cast_fp16")]; - int32 var_260 = const()[name = string("op_260"), val = int32(3)]; - tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; - fp16 var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_279_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")]; - tensor obj_1_variance_0_to_fp16 = const()[name = string("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7193920)))]; - tensor obj_1_gamma_0_to_fp16 = const()[name = string("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7195520)))]; - tensor obj_1_beta_0_to_fp16 = const()[name = string("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7197120)))]; - fp16 obj_1_epsilon_0_to_fp16 = const()[name = string("obj_1_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_1_cast_fp16")]; - string var_301_pad_type_0 = const()[name = string("op_301_pad_type_0"), val = string("valid")]; - tensor var_301_strides_0 = const()[name = string("op_301_strides_0"), val = tensor([1, 1])]; - tensor var_301_pad_0 = const()[name = string("op_301_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_301_dilations_0 = const()[name = string("op_301_dilations_0"), val = tensor([1, 1])]; - int32 var_301_groups_0 = const()[name = string("op_301_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7198720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493696))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493824)))]; - tensor var_301_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_301_dilations_0, groups = var_301_groups_0, pad = var_301_pad_0, pad_type = var_301_pad_type_0, strides = var_301_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_301_cast_fp16")]; - string var_307_pad_type_0 = const()[name = string("op_307_pad_type_0"), val = string("valid")]; - tensor var_307_strides_0 = const()[name = string("op_307_strides_0"), val = tensor([1, 1])]; - tensor var_307_pad_0 = const()[name = string("op_307_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_307_dilations_0 = const()[name = string("op_307_dilations_0"), val = tensor([1, 1])]; - int32 var_307_groups_0 = const()[name = string("op_307_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7518208))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7495424))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_307_cast_fp16 = conv(dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_307_cast_fp16")]; - tensor query_1_cast_fp16 = add(x = var_301_cast_fp16, y = var_307_cast_fp16)[name = string("query_1_cast_fp16")]; - string var_316_pad_type_0 = const()[name = string("op_316_pad_type_0"), val = string("valid")]; - tensor var_316_strides_0 = const()[name = string("op_316_strides_0"), val = tensor([1, 1])]; - tensor var_316_pad_0 = const()[name = string("op_316_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_316_dilations_0 = const()[name = string("op_316_dilations_0"), val = tensor([1, 1])]; - int32 var_316_groups_0 = const()[name = string("op_316_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7592000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7886976))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_316_cast_fp16 = conv(dilations = var_316_dilations_0, groups = var_316_groups_0, pad = var_316_pad_0, pad_type = var_316_pad_type_0, strides = var_316_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_316_cast_fp16")]; - string var_322_pad_type_0 = const()[name = string("op_322_pad_type_0"), val = string("valid")]; - tensor var_322_strides_0 = const()[name = string("op_322_strides_0"), val = tensor([1, 1])]; - tensor var_322_pad_0 = const()[name = string("op_322_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_322_dilations_0 = const()[name = string("op_322_dilations_0"), val = tensor([1, 1])]; - int32 var_322_groups_0 = const()[name = string("op_322_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7908352))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887104))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_322_cast_fp16 = conv(dilations = var_322_dilations_0, groups = var_322_groups_0, pad = var_322_pad_0, pad_type = var_322_pad_type_0, strides = var_322_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_322_cast_fp16")]; - tensor key_1_cast_fp16 = add(x = var_316_cast_fp16, y = var_322_cast_fp16)[name = string("key_1_cast_fp16")]; - string var_332_pad_type_0 = const()[name = string("op_332_pad_type_0"), val = string("valid")]; - tensor var_332_strides_0 = const()[name = string("op_332_strides_0"), val = tensor([1, 1])]; - tensor var_332_pad_0 = const()[name = string("op_332_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_332_dilations_0 = const()[name = string("op_332_dilations_0"), val = tensor([1, 1])]; - int32 var_332_groups_0 = const()[name = string("op_332_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7982144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8277120))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8277248)))]; - tensor var_332_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_332_cast_fp16")]; - string var_338_pad_type_0 = const()[name = string("op_338_pad_type_0"), val = string("valid")]; - tensor var_338_strides_0 = const()[name = string("op_338_strides_0"), val = tensor([1, 1])]; - tensor var_338_pad_0 = const()[name = string("op_338_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_338_dilations_0 = const()[name = string("op_338_dilations_0"), val = tensor([1, 1])]; - int32 var_338_groups_0 = const()[name = string("op_338_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8302400))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8278848))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_338_cast_fp16 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_338_cast_fp16")]; - tensor value_1_cast_fp16 = add(x = var_332_cast_fp16, y = var_338_cast_fp16)[name = string("value_1_cast_fp16")]; - tensor var_341 = const()[name = string("op_341"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_1_cast_fp16 = reshape(shape = var_341, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; - fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1p-3)]; - tensor var_344_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")]; - tensor var_345 = const()[name = string("op_345"), val = tensor([1, 12, 64, -1])]; - tensor var_346_cast_fp16 = reshape(shape = var_345, x = key_1_cast_fp16)[name = string("op_346_cast_fp16")]; - bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; - bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; - tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_344_cast_fp16, y = var_346_cast_fp16)[name = string("mh_w_1_cast_fp16")]; - tensor var_349_cast_fp16 = softmax(axis = var_260, x = mh_w_1_cast_fp16)[name = string("op_349_cast_fp16")]; - tensor var_350 = const()[name = string("op_350"), val = tensor([1, 12, 64, -1])]; - tensor var_351_cast_fp16 = reshape(shape = var_350, x = value_1_cast_fp16)[name = string("op_351_cast_fp16")]; - bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; - bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_351_cast_fp16, y = var_349_cast_fp16)[name = string("attn_1_cast_fp16")]; - tensor var_354 = const()[name = string("op_354"), val = tensor([1, 768, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_354, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; - string var_364_pad_type_0 = const()[name = string("op_364_pad_type_0"), val = string("valid")]; - tensor var_364_strides_0 = const()[name = string("op_364_strides_0"), val = tensor([1, 1])]; - tensor var_364_pad_0 = const()[name = string("op_364_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_364_dilations_0 = const()[name = string("op_364_dilations_0"), val = tensor([1, 1])]; - int32 var_364_groups_0 = const()[name = string("op_364_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8376192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8671168))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8671296)))]; - tensor var_364_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_364_dilations_0, groups = var_364_groups_0, pad = var_364_pad_0, pad_type = var_364_pad_type_0, strides = var_364_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_364_cast_fp16")]; - string var_370_pad_type_0 = const()[name = string("op_370_pad_type_0"), val = string("valid")]; - tensor var_370_strides_0 = const()[name = string("op_370_strides_0"), val = tensor([1, 1])]; - tensor var_370_pad_0 = const()[name = string("op_370_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_370_dilations_0 = const()[name = string("op_370_dilations_0"), val = tensor([1, 1])]; - int32 var_370_groups_0 = const()[name = string("op_370_groups_0"), val = int32(1)]; - tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8692480))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8672896))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_370_cast_fp16 = conv(dilations = var_370_dilations_0, groups = var_370_groups_0, pad = var_370_pad_0, pad_type = var_370_pad_type_0, strides = var_370_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_370_cast_fp16")]; - tensor obj_3_cast_fp16 = add(x = var_364_cast_fp16, y = var_370_cast_fp16)[name = string("obj_3_cast_fp16")]; - tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = string("inputs_3_cast_fp16")]; - tensor out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor([1])]; - fp16 var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_381_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")]; - tensor input_3_gamma_0_to_fp16 = const()[name = string("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8766272)))]; - tensor input_3_beta_0_to_fp16 = const()[name = string("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8767872)))]; - fp16 input_3_epsilon_0_to_fp16 = const()[name = string("input_3_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("input_3_cast_fp16")]; - string var_399_pad_type_0 = const()[name = string("op_399_pad_type_0"), val = string("valid")]; - tensor var_399_strides_0 = const()[name = string("op_399_strides_0"), val = tensor([1, 1])]; - tensor var_399_pad_0 = const()[name = string("op_399_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_399_dilations_0 = const()[name = string("op_399_dilations_0"), val = tensor([1, 1])]; - int32 var_399_groups_0 = const()[name = string("op_399_groups_0"), val = int32(1)]; - tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8769472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9949184))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9949312)))]; - tensor var_399_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_399_cast_fp16")]; - string var_405_pad_type_0 = const()[name = string("op_405_pad_type_0"), val = string("valid")]; - tensor var_405_strides_0 = const()[name = string("op_405_strides_0"), val = tensor([1, 1])]; - tensor var_405_pad_0 = const()[name = string("op_405_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_405_dilations_0 = const()[name = string("op_405_dilations_0"), val = tensor([1, 1])]; - int32 var_405_groups_0 = const()[name = string("op_405_groups_0"), val = int32(1)]; - tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10044736))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9955520))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_405_cast_fp16 = conv(dilations = var_405_dilations_0, groups = var_405_groups_0, pad = var_405_pad_0, pad_type = var_405_pad_type_0, strides = var_405_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_405_cast_fp16")]; - tensor input_5_cast_fp16 = add(x = var_399_cast_fp16, y = var_405_cast_fp16)[name = string("input_5_cast_fp16")]; - string input_7_mode_0 = const()[name = string("input_7_mode_0"), val = string("EXACT")]; - tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")]; - string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")]; - tensor var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor([1, 1])]; - tensor var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor([1, 1])]; - int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)]; - tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10339712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11519424))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11519552)))]; - tensor var_416_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("op_416_cast_fp16")]; - string var_422_pad_type_0 = const()[name = string("op_422_pad_type_0"), val = string("valid")]; - tensor var_422_strides_0 = const()[name = string("op_422_strides_0"), val = tensor([1, 1])]; - tensor var_422_pad_0 = const()[name = string("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_dilations_0 = const()[name = string("op_422_dilations_0"), val = tensor([1, 1])]; - int32 var_422_groups_0 = const()[name = string("op_422_groups_0"), val = int32(1)]; - tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11594560))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11521152))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_422_cast_fp16 = conv(dilations = var_422_dilations_0, groups = var_422_groups_0, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_422_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = string("op_422_cast_fp16")]; - tensor hidden_states_5_cast_fp16 = add(x = var_416_cast_fp16, y = var_422_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; - tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_5_cast_fp16")]; - int32 var_432 = const()[name = string("op_432"), val = int32(3)]; - tensor out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor([1])]; - fp16 var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_451_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")]; - tensor obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11889536)))]; - tensor obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11891136)))]; - fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("obj_5_cast_fp16")]; - string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")]; - tensor var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor([1, 1])]; - tensor var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor([1, 1])]; - int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11892736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12187712))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12187840)))]; - tensor var_473_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_473_cast_fp16")]; - string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")]; - tensor var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor([1, 1])]; - tensor var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor([1, 1])]; - int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206656))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12189440))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_479_cast_fp16")]; - tensor query_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("query_3_cast_fp16")]; - string var_488_pad_type_0 = const()[name = string("op_488_pad_type_0"), val = string("valid")]; - tensor var_488_strides_0 = const()[name = string("op_488_strides_0"), val = tensor([1, 1])]; - tensor var_488_pad_0 = const()[name = string("op_488_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_488_dilations_0 = const()[name = string("op_488_dilations_0"), val = tensor([1, 1])]; - int32 var_488_groups_0 = const()[name = string("op_488_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12280448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12575424))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_488_cast_fp16 = conv(dilations = var_488_dilations_0, groups = var_488_groups_0, pad = var_488_pad_0, pad_type = var_488_pad_type_0, strides = var_488_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_488_cast_fp16")]; - string var_494_pad_type_0 = const()[name = string("op_494_pad_type_0"), val = string("valid")]; - tensor var_494_strides_0 = const()[name = string("op_494_strides_0"), val = tensor([1, 1])]; - tensor var_494_pad_0 = const()[name = string("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_dilations_0 = const()[name = string("op_494_dilations_0"), val = tensor([1, 1])]; - int32 var_494_groups_0 = const()[name = string("op_494_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12591360))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12575552))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_494_cast_fp16 = conv(dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_494_cast_fp16")]; - tensor key_3_cast_fp16 = add(x = var_488_cast_fp16, y = var_494_cast_fp16)[name = string("key_3_cast_fp16")]; - string var_504_pad_type_0 = const()[name = string("op_504_pad_type_0"), val = string("valid")]; - tensor var_504_strides_0 = const()[name = string("op_504_strides_0"), val = tensor([1, 1])]; - tensor var_504_pad_0 = const()[name = string("op_504_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_504_dilations_0 = const()[name = string("op_504_dilations_0"), val = tensor([1, 1])]; - int32 var_504_groups_0 = const()[name = string("op_504_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12665152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12960128))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12960256)))]; - tensor var_504_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_504_cast_fp16")]; - string var_510_pad_type_0 = const()[name = string("op_510_pad_type_0"), val = string("valid")]; - tensor var_510_strides_0 = const()[name = string("op_510_strides_0"), val = tensor([1, 1])]; - tensor var_510_pad_0 = const()[name = string("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_dilations_0 = const()[name = string("op_510_dilations_0"), val = tensor([1, 1])]; - int32 var_510_groups_0 = const()[name = string("op_510_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12975872))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12961856))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_510_cast_fp16 = conv(dilations = var_510_dilations_0, groups = var_510_groups_0, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_510_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_510_cast_fp16")]; - tensor value_3_cast_fp16 = add(x = var_504_cast_fp16, y = var_510_cast_fp16)[name = string("value_3_cast_fp16")]; - tensor var_513 = const()[name = string("op_513"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_3_cast_fp16 = reshape(shape = var_513, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")]; - fp16 var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = fp16(0x1p-3)]; - tensor var_516_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_515_to_fp16)[name = string("op_516_cast_fp16")]; - tensor var_517 = const()[name = string("op_517"), val = tensor([1, 12, 64, -1])]; - tensor var_518_cast_fp16 = reshape(shape = var_517, x = key_3_cast_fp16)[name = string("op_518_cast_fp16")]; - bool mh_w_3_transpose_x_0 = const()[name = string("mh_w_3_transpose_x_0"), val = bool(true)]; - bool mh_w_3_transpose_y_0 = const()[name = string("mh_w_3_transpose_y_0"), val = bool(false)]; - tensor mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_516_cast_fp16, y = var_518_cast_fp16)[name = string("mh_w_3_cast_fp16")]; - tensor var_521_cast_fp16 = softmax(axis = var_432, x = mh_w_3_cast_fp16)[name = string("op_521_cast_fp16")]; - tensor var_522 = const()[name = string("op_522"), val = tensor([1, 12, 64, -1])]; - tensor var_523_cast_fp16 = reshape(shape = var_522, x = value_3_cast_fp16)[name = string("op_523_cast_fp16")]; - bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; - bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_523_cast_fp16, y = var_521_cast_fp16)[name = string("attn_3_cast_fp16")]; - tensor var_526 = const()[name = string("op_526"), val = tensor([1, 768, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_526, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; - string var_536_pad_type_0 = const()[name = string("op_536_pad_type_0"), val = string("valid")]; - tensor var_536_strides_0 = const()[name = string("op_536_strides_0"), val = tensor([1, 1])]; - tensor var_536_pad_0 = const()[name = string("op_536_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_536_dilations_0 = const()[name = string("op_536_dilations_0"), val = tensor([1, 1])]; - int32 var_536_groups_0 = const()[name = string("op_536_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13049664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13344640))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13344768)))]; - tensor var_536_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_536_dilations_0, groups = var_536_groups_0, pad = var_536_pad_0, pad_type = var_536_pad_type_0, strides = var_536_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_536_cast_fp16")]; - string var_542_pad_type_0 = const()[name = string("op_542_pad_type_0"), val = string("valid")]; - tensor var_542_strides_0 = const()[name = string("op_542_strides_0"), val = tensor([1, 1])]; - tensor var_542_pad_0 = const()[name = string("op_542_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_542_dilations_0 = const()[name = string("op_542_dilations_0"), val = tensor([1, 1])]; - int32 var_542_groups_0 = const()[name = string("op_542_groups_0"), val = int32(1)]; - tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13357056))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13346368))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_542_cast_fp16 = conv(dilations = var_542_dilations_0, groups = var_542_groups_0, pad = var_542_pad_0, pad_type = var_542_pad_type_0, strides = var_542_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_542_cast_fp16")]; - tensor obj_7_cast_fp16 = add(x = var_536_cast_fp16, y = var_542_cast_fp16)[name = string("obj_7_cast_fp16")]; - tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = string("inputs_7_cast_fp16")]; - tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; - fp16 var_553_to_fp16 = const()[name = string("op_553_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_553_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")]; - tensor input_11_gamma_0_to_fp16 = const()[name = string("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13430848)))]; - tensor input_11_beta_0_to_fp16 = const()[name = string("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13432448)))]; - fp16 input_11_epsilon_0_to_fp16 = const()[name = string("input_11_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("input_11_cast_fp16")]; - string var_571_pad_type_0 = const()[name = string("op_571_pad_type_0"), val = string("valid")]; - tensor var_571_strides_0 = const()[name = string("op_571_strides_0"), val = tensor([1, 1])]; - tensor var_571_pad_0 = const()[name = string("op_571_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_571_dilations_0 = const()[name = string("op_571_dilations_0"), val = tensor([1, 1])]; - int32 var_571_groups_0 = const()[name = string("op_571_groups_0"), val = int32(1)]; - tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13434048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14613760))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14613888)))]; - tensor var_571_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_571_dilations_0, groups = var_571_groups_0, pad = var_571_pad_0, pad_type = var_571_pad_type_0, strides = var_571_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_571_cast_fp16")]; - string var_577_pad_type_0 = const()[name = string("op_577_pad_type_0"), val = string("valid")]; - tensor var_577_strides_0 = const()[name = string("op_577_strides_0"), val = tensor([1, 1])]; - tensor var_577_pad_0 = const()[name = string("op_577_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_577_dilations_0 = const()[name = string("op_577_dilations_0"), val = tensor([1, 1])]; - int32 var_577_groups_0 = const()[name = string("op_577_groups_0"), val = int32(1)]; - tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14688448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14620096))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_577_cast_fp16 = conv(dilations = var_577_dilations_0, groups = var_577_groups_0, pad = var_577_pad_0, pad_type = var_577_pad_type_0, strides = var_577_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_577_cast_fp16")]; - tensor input_13_cast_fp16 = add(x = var_571_cast_fp16, y = var_577_cast_fp16)[name = string("input_13_cast_fp16")]; - string input_15_mode_0 = const()[name = string("input_15_mode_0"), val = string("EXACT")]; - tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")]; - string var_588_pad_type_0 = const()[name = string("op_588_pad_type_0"), val = string("valid")]; - tensor var_588_strides_0 = const()[name = string("op_588_strides_0"), val = tensor([1, 1])]; - tensor var_588_pad_0 = const()[name = string("op_588_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_588_dilations_0 = const()[name = string("op_588_dilations_0"), val = tensor([1, 1])]; - int32 var_588_groups_0 = const()[name = string("op_588_groups_0"), val = int32(1)]; - tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14983424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16163136))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16163264)))]; - tensor var_588_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_588_cast_fp16")]; - string var_594_pad_type_0 = const()[name = string("op_594_pad_type_0"), val = string("valid")]; - tensor var_594_strides_0 = const()[name = string("op_594_strides_0"), val = tensor([1, 1])]; - tensor var_594_pad_0 = const()[name = string("op_594_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_594_dilations_0 = const()[name = string("op_594_dilations_0"), val = tensor([1, 1])]; - int32 var_594_groups_0 = const()[name = string("op_594_groups_0"), val = int32(1)]; - tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16225152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16164864))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_594_cast_fp16 = conv(dilations = var_594_dilations_0, groups = var_594_groups_0, pad = var_594_pad_0, pad_type = var_594_pad_type_0, strides = var_594_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_594_cast_fp16")]; - tensor hidden_states_7_cast_fp16 = add(x = var_588_cast_fp16, y = var_594_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; - tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_9_cast_fp16")]; - int32 var_604 = const()[name = string("op_604"), val = int32(3)]; - tensor out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor([1])]; - fp16 var_623_to_fp16 = const()[name = string("op_623_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_623_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")]; - tensor obj_9_gamma_0_to_fp16 = const()[name = string("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16520128)))]; - tensor obj_9_beta_0_to_fp16 = const()[name = string("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16521728)))]; - fp16 obj_9_epsilon_0_to_fp16 = const()[name = string("obj_9_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_9_cast_fp16")]; - string var_645_pad_type_0 = const()[name = string("op_645_pad_type_0"), val = string("valid")]; - tensor var_645_strides_0 = const()[name = string("op_645_strides_0"), val = tensor([1, 1])]; - tensor var_645_pad_0 = const()[name = string("op_645_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_645_dilations_0 = const()[name = string("op_645_dilations_0"), val = tensor([1, 1])]; - int32 var_645_groups_0 = const()[name = string("op_645_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16523328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16818304))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16818432)))]; - tensor var_645_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_645_cast_fp16")]; - string var_651_pad_type_0 = const()[name = string("op_651_pad_type_0"), val = string("valid")]; - tensor var_651_strides_0 = const()[name = string("op_651_strides_0"), val = tensor([1, 1])]; - tensor var_651_pad_0 = const()[name = string("op_651_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_651_dilations_0 = const()[name = string("op_651_dilations_0"), val = tensor([1, 1])]; - int32 var_651_groups_0 = const()[name = string("op_651_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16834112))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16820032))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_651_cast_fp16 = conv(dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_651_cast_fp16")]; - tensor query_5_cast_fp16 = add(x = var_645_cast_fp16, y = var_651_cast_fp16)[name = string("query_5_cast_fp16")]; - string var_660_pad_type_0 = const()[name = string("op_660_pad_type_0"), val = string("valid")]; - tensor var_660_strides_0 = const()[name = string("op_660_strides_0"), val = tensor([1, 1])]; - tensor var_660_pad_0 = const()[name = string("op_660_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_660_dilations_0 = const()[name = string("op_660_dilations_0"), val = tensor([1, 1])]; - int32 var_660_groups_0 = const()[name = string("op_660_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16907904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17202880))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_660_cast_fp16 = conv(dilations = var_660_dilations_0, groups = var_660_groups_0, pad = var_660_pad_0, pad_type = var_660_pad_type_0, strides = var_660_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_660_cast_fp16")]; - string var_666_pad_type_0 = const()[name = string("op_666_pad_type_0"), val = string("valid")]; - tensor var_666_strides_0 = const()[name = string("op_666_strides_0"), val = tensor([1, 1])]; - tensor var_666_pad_0 = const()[name = string("op_666_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_666_dilations_0 = const()[name = string("op_666_dilations_0"), val = tensor([1, 1])]; - int32 var_666_groups_0 = const()[name = string("op_666_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17216384))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17203008))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_666_cast_fp16 = conv(dilations = var_666_dilations_0, groups = var_666_groups_0, pad = var_666_pad_0, pad_type = var_666_pad_type_0, strides = var_666_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_666_cast_fp16")]; - tensor key_5_cast_fp16 = add(x = var_660_cast_fp16, y = var_666_cast_fp16)[name = string("key_5_cast_fp16")]; - string var_676_pad_type_0 = const()[name = string("op_676_pad_type_0"), val = string("valid")]; - tensor var_676_strides_0 = const()[name = string("op_676_strides_0"), val = tensor([1, 1])]; - tensor var_676_pad_0 = const()[name = string("op_676_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_676_dilations_0 = const()[name = string("op_676_dilations_0"), val = tensor([1, 1])]; - int32 var_676_groups_0 = const()[name = string("op_676_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17290176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17585152))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17585280)))]; - tensor var_676_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_676_dilations_0, groups = var_676_groups_0, pad = var_676_pad_0, pad_type = var_676_pad_type_0, strides = var_676_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_676_cast_fp16")]; - string var_682_pad_type_0 = const()[name = string("op_682_pad_type_0"), val = string("valid")]; - tensor var_682_strides_0 = const()[name = string("op_682_strides_0"), val = tensor([1, 1])]; - tensor var_682_pad_0 = const()[name = string("op_682_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_682_dilations_0 = const()[name = string("op_682_dilations_0"), val = tensor([1, 1])]; - int32 var_682_groups_0 = const()[name = string("op_682_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17596992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17586880))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_682_cast_fp16 = conv(dilations = var_682_dilations_0, groups = var_682_groups_0, pad = var_682_pad_0, pad_type = var_682_pad_type_0, strides = var_682_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_682_cast_fp16")]; - tensor value_5_cast_fp16 = add(x = var_676_cast_fp16, y = var_682_cast_fp16)[name = string("value_5_cast_fp16")]; - tensor var_685 = const()[name = string("op_685"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_5_cast_fp16 = reshape(shape = var_685, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")]; - fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1p-3)]; - tensor var_688_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_687_to_fp16)[name = string("op_688_cast_fp16")]; - tensor var_689 = const()[name = string("op_689"), val = tensor([1, 12, 64, -1])]; - tensor var_690_cast_fp16 = reshape(shape = var_689, x = key_5_cast_fp16)[name = string("op_690_cast_fp16")]; - bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; - bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; - tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_688_cast_fp16, y = var_690_cast_fp16)[name = string("mh_w_5_cast_fp16")]; - tensor var_693_cast_fp16 = softmax(axis = var_604, x = mh_w_5_cast_fp16)[name = string("op_693_cast_fp16")]; - tensor var_694 = const()[name = string("op_694"), val = tensor([1, 12, 64, -1])]; - tensor var_695_cast_fp16 = reshape(shape = var_694, x = value_5_cast_fp16)[name = string("op_695_cast_fp16")]; - bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; - bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_695_cast_fp16, y = var_693_cast_fp16)[name = string("attn_5_cast_fp16")]; - tensor var_698 = const()[name = string("op_698"), val = tensor([1, 768, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_698, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")]; - string var_708_pad_type_0 = const()[name = string("op_708_pad_type_0"), val = string("valid")]; - tensor var_708_strides_0 = const()[name = string("op_708_strides_0"), val = tensor([1, 1])]; - tensor var_708_pad_0 = const()[name = string("op_708_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_708_dilations_0 = const()[name = string("op_708_dilations_0"), val = tensor([1, 1])]; - int32 var_708_groups_0 = const()[name = string("op_708_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17670784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17965760))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17965888)))]; - tensor var_708_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("op_708_cast_fp16")]; - string var_714_pad_type_0 = const()[name = string("op_714_pad_type_0"), val = string("valid")]; - tensor var_714_strides_0 = const()[name = string("op_714_strides_0"), val = tensor([1, 1])]; - tensor var_714_pad_0 = const()[name = string("op_714_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_714_dilations_0 = const()[name = string("op_714_dilations_0"), val = tensor([1, 1])]; - int32 var_714_groups_0 = const()[name = string("op_714_groups_0"), val = int32(1)]; - tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17976064))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17967488))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_714_cast_fp16 = conv(dilations = var_714_dilations_0, groups = var_714_groups_0, pad = var_714_pad_0, pad_type = var_714_pad_type_0, strides = var_714_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = string("op_714_cast_fp16")]; - tensor obj_11_cast_fp16 = add(x = var_708_cast_fp16, y = var_714_cast_fp16)[name = string("obj_11_cast_fp16")]; - tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_11_cast_fp16")]; - tensor out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor([1])]; - fp16 var_725_to_fp16 = const()[name = string("op_725_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_725_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")]; - tensor input_19_gamma_0_to_fp16 = const()[name = string("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18049856)))]; - tensor input_19_beta_0_to_fp16 = const()[name = string("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18051456)))]; - fp16 input_19_epsilon_0_to_fp16 = const()[name = string("input_19_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_19_cast_fp16")]; - string var_743_pad_type_0 = const()[name = string("op_743_pad_type_0"), val = string("valid")]; - tensor var_743_strides_0 = const()[name = string("op_743_strides_0"), val = tensor([1, 1])]; - tensor var_743_pad_0 = const()[name = string("op_743_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_743_dilations_0 = const()[name = string("op_743_dilations_0"), val = tensor([1, 1])]; - int32 var_743_groups_0 = const()[name = string("op_743_groups_0"), val = int32(1)]; - tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18053056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19232768))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19232896)))]; - tensor var_743_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_743_dilations_0, groups = var_743_groups_0, pad = var_743_pad_0, pad_type = var_743_pad_type_0, strides = var_743_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_743_cast_fp16")]; - string var_749_pad_type_0 = const()[name = string("op_749_pad_type_0"), val = string("valid")]; - tensor var_749_strides_0 = const()[name = string("op_749_strides_0"), val = tensor([1, 1])]; - tensor var_749_pad_0 = const()[name = string("op_749_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_749_dilations_0 = const()[name = string("op_749_dilations_0"), val = tensor([1, 1])]; - int32 var_749_groups_0 = const()[name = string("op_749_groups_0"), val = int32(1)]; - tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19298752))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19239104))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_749_cast_fp16 = conv(dilations = var_749_dilations_0, groups = var_749_groups_0, pad = var_749_pad_0, pad_type = var_749_pad_type_0, strides = var_749_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_749_cast_fp16")]; - tensor input_21_cast_fp16 = add(x = var_743_cast_fp16, y = var_749_cast_fp16)[name = string("input_21_cast_fp16")]; - string input_23_mode_0 = const()[name = string("input_23_mode_0"), val = string("EXACT")]; - tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = string("input_23_cast_fp16")]; - string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")]; - tensor var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor([1, 1])]; - tensor var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor([1, 1])]; - int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)]; - tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19593728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20773440))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20773568)))]; - tensor var_760_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_760_cast_fp16")]; - string var_766_pad_type_0 = const()[name = string("op_766_pad_type_0"), val = string("valid")]; - tensor var_766_strides_0 = const()[name = string("op_766_strides_0"), val = tensor([1, 1])]; - tensor var_766_pad_0 = const()[name = string("op_766_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_766_dilations_0 = const()[name = string("op_766_dilations_0"), val = tensor([1, 1])]; - int32 var_766_groups_0 = const()[name = string("op_766_groups_0"), val = int32(1)]; - tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20834944))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20775168))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_766_cast_fp16 = conv(dilations = var_766_dilations_0, groups = var_766_groups_0, pad = var_766_pad_0, pad_type = var_766_pad_type_0, strides = var_766_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_766_cast_fp16")]; - tensor hidden_states_9_cast_fp16 = add(x = var_760_cast_fp16, y = var_766_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; - tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_13_cast_fp16")]; - int32 var_776 = const()[name = string("op_776"), val = int32(3)]; - tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; - fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_795_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")]; - tensor obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21129920)))]; - tensor obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21131520)))]; - fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_13_cast_fp16")]; - string var_817_pad_type_0 = const()[name = string("op_817_pad_type_0"), val = string("valid")]; - tensor var_817_strides_0 = const()[name = string("op_817_strides_0"), val = tensor([1, 1])]; - tensor var_817_pad_0 = const()[name = string("op_817_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_817_dilations_0 = const()[name = string("op_817_dilations_0"), val = tensor([1, 1])]; - int32 var_817_groups_0 = const()[name = string("op_817_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21133120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21428096))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21428224)))]; - tensor var_817_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_817_dilations_0, groups = var_817_groups_0, pad = var_817_pad_0, pad_type = var_817_pad_type_0, strides = var_817_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_817_cast_fp16")]; - string var_823_pad_type_0 = const()[name = string("op_823_pad_type_0"), val = string("valid")]; - tensor var_823_strides_0 = const()[name = string("op_823_strides_0"), val = tensor([1, 1])]; - tensor var_823_pad_0 = const()[name = string("op_823_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_823_dilations_0 = const()[name = string("op_823_dilations_0"), val = tensor([1, 1])]; - int32 var_823_groups_0 = const()[name = string("op_823_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21440384))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21429824))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_823_cast_fp16 = conv(dilations = var_823_dilations_0, groups = var_823_groups_0, pad = var_823_pad_0, pad_type = var_823_pad_type_0, strides = var_823_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_823_cast_fp16")]; - tensor query_7_cast_fp16 = add(x = var_817_cast_fp16, y = var_823_cast_fp16)[name = string("query_7_cast_fp16")]; - string var_832_pad_type_0 = const()[name = string("op_832_pad_type_0"), val = string("valid")]; - tensor var_832_strides_0 = const()[name = string("op_832_strides_0"), val = tensor([1, 1])]; - tensor var_832_pad_0 = const()[name = string("op_832_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_832_dilations_0 = const()[name = string("op_832_dilations_0"), val = tensor([1, 1])]; - int32 var_832_groups_0 = const()[name = string("op_832_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21514176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21809152))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_832_cast_fp16 = conv(dilations = var_832_dilations_0, groups = var_832_groups_0, pad = var_832_pad_0, pad_type = var_832_pad_type_0, strides = var_832_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_832_cast_fp16")]; - string var_838_pad_type_0 = const()[name = string("op_838_pad_type_0"), val = string("valid")]; - tensor var_838_strides_0 = const()[name = string("op_838_strides_0"), val = tensor([1, 1])]; - tensor var_838_pad_0 = const()[name = string("op_838_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_838_dilations_0 = const()[name = string("op_838_dilations_0"), val = tensor([1, 1])]; - int32 var_838_groups_0 = const()[name = string("op_838_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21819776))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21809280))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_838_cast_fp16 = conv(dilations = var_838_dilations_0, groups = var_838_groups_0, pad = var_838_pad_0, pad_type = var_838_pad_type_0, strides = var_838_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_838_cast_fp16")]; - tensor key_7_cast_fp16 = add(x = var_832_cast_fp16, y = var_838_cast_fp16)[name = string("key_7_cast_fp16")]; - string var_848_pad_type_0 = const()[name = string("op_848_pad_type_0"), val = string("valid")]; - tensor var_848_strides_0 = const()[name = string("op_848_strides_0"), val = tensor([1, 1])]; - tensor var_848_pad_0 = const()[name = string("op_848_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_848_dilations_0 = const()[name = string("op_848_dilations_0"), val = tensor([1, 1])]; - int32 var_848_groups_0 = const()[name = string("op_848_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21893568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22188544))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22188672)))]; - tensor var_848_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_848_cast_fp16")]; - string var_854_pad_type_0 = const()[name = string("op_854_pad_type_0"), val = string("valid")]; - tensor var_854_strides_0 = const()[name = string("op_854_strides_0"), val = tensor([1, 1])]; - tensor var_854_pad_0 = const()[name = string("op_854_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_854_dilations_0 = const()[name = string("op_854_dilations_0"), val = tensor([1, 1])]; - int32 var_854_groups_0 = const()[name = string("op_854_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22198784))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22190272))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_854_cast_fp16 = conv(dilations = var_854_dilations_0, groups = var_854_groups_0, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_854_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_854_cast_fp16")]; - tensor value_7_cast_fp16 = add(x = var_848_cast_fp16, y = var_854_cast_fp16)[name = string("value_7_cast_fp16")]; - tensor var_857 = const()[name = string("op_857"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_7_cast_fp16 = reshape(shape = var_857, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")]; - fp16 var_859_to_fp16 = const()[name = string("op_859_to_fp16"), val = fp16(0x1p-3)]; - tensor var_860_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_859_to_fp16)[name = string("op_860_cast_fp16")]; - tensor var_861 = const()[name = string("op_861"), val = tensor([1, 12, 64, -1])]; - tensor var_862_cast_fp16 = reshape(shape = var_861, x = key_7_cast_fp16)[name = string("op_862_cast_fp16")]; - bool mh_w_7_transpose_x_0 = const()[name = string("mh_w_7_transpose_x_0"), val = bool(true)]; - bool mh_w_7_transpose_y_0 = const()[name = string("mh_w_7_transpose_y_0"), val = bool(false)]; - tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_860_cast_fp16, y = var_862_cast_fp16)[name = string("mh_w_7_cast_fp16")]; - tensor var_865_cast_fp16 = softmax(axis = var_776, x = mh_w_7_cast_fp16)[name = string("op_865_cast_fp16")]; - tensor var_866 = const()[name = string("op_866"), val = tensor([1, 12, 64, -1])]; - tensor var_867_cast_fp16 = reshape(shape = var_866, x = value_7_cast_fp16)[name = string("op_867_cast_fp16")]; - bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; - bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; - tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_867_cast_fp16, y = var_865_cast_fp16)[name = string("attn_7_cast_fp16")]; - tensor var_870 = const()[name = string("op_870"), val = tensor([1, 768, 1, -1])]; - tensor input_25_cast_fp16 = reshape(shape = var_870, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")]; - string var_880_pad_type_0 = const()[name = string("op_880_pad_type_0"), val = string("valid")]; - tensor var_880_strides_0 = const()[name = string("op_880_strides_0"), val = tensor([1, 1])]; - tensor var_880_pad_0 = const()[name = string("op_880_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_880_dilations_0 = const()[name = string("op_880_dilations_0"), val = tensor([1, 1])]; - int32 var_880_groups_0 = const()[name = string("op_880_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22272576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22567552))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22567680)))]; - tensor var_880_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_880_dilations_0, groups = var_880_groups_0, pad = var_880_pad_0, pad_type = var_880_pad_type_0, strides = var_880_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_880_cast_fp16")]; - string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")]; - tensor var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor([1, 1])]; - tensor var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor([1, 1])]; - int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)]; - tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22576640))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22569280))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_886_cast_fp16 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_886_cast_fp16")]; - tensor obj_15_cast_fp16 = add(x = var_880_cast_fp16, y = var_886_cast_fp16)[name = string("obj_15_cast_fp16")]; - tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = string("inputs_15_cast_fp16")]; - tensor out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor([1])]; - fp16 var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_897_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")]; - tensor input_27_gamma_0_to_fp16 = const()[name = string("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22650432)))]; - tensor input_27_beta_0_to_fp16 = const()[name = string("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22652032)))]; - fp16 input_27_epsilon_0_to_fp16 = const()[name = string("input_27_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("input_27_cast_fp16")]; - string var_915_pad_type_0 = const()[name = string("op_915_pad_type_0"), val = string("valid")]; - tensor var_915_strides_0 = const()[name = string("op_915_strides_0"), val = tensor([1, 1])]; - tensor var_915_pad_0 = const()[name = string("op_915_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_915_dilations_0 = const()[name = string("op_915_dilations_0"), val = tensor([1, 1])]; - int32 var_915_groups_0 = const()[name = string("op_915_groups_0"), val = int32(1)]; - tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22653632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23833344))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23833472)))]; - tensor var_915_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_915_dilations_0, groups = var_915_groups_0, pad = var_915_pad_0, pad_type = var_915_pad_type_0, strides = var_915_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_915_cast_fp16")]; - string var_921_pad_type_0 = const()[name = string("op_921_pad_type_0"), val = string("valid")]; - tensor var_921_strides_0 = const()[name = string("op_921_strides_0"), val = tensor([1, 1])]; - tensor var_921_pad_0 = const()[name = string("op_921_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_921_dilations_0 = const()[name = string("op_921_dilations_0"), val = tensor([1, 1])]; - int32 var_921_groups_0 = const()[name = string("op_921_groups_0"), val = int32(1)]; - tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23883648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23839680))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_921_cast_fp16 = conv(dilations = var_921_dilations_0, groups = var_921_groups_0, pad = var_921_pad_0, pad_type = var_921_pad_type_0, strides = var_921_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = string("op_921_cast_fp16")]; - tensor input_29_cast_fp16 = add(x = var_915_cast_fp16, y = var_921_cast_fp16)[name = string("input_29_cast_fp16")]; - string input_31_mode_0 = const()[name = string("input_31_mode_0"), val = string("EXACT")]; - tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")]; - string var_932_pad_type_0 = const()[name = string("op_932_pad_type_0"), val = string("valid")]; - tensor var_932_strides_0 = const()[name = string("op_932_strides_0"), val = tensor([1, 1])]; - tensor var_932_pad_0 = const()[name = string("op_932_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_932_dilations_0 = const()[name = string("op_932_dilations_0"), val = tensor([1, 1])]; - int32 var_932_groups_0 = const()[name = string("op_932_groups_0"), val = int32(1)]; - tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24178624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25358336))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25358464)))]; - tensor var_932_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_932_dilations_0, groups = var_932_groups_0, pad = var_932_pad_0, pad_type = var_932_pad_type_0, strides = var_932_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_932_cast_fp16")]; - string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")]; - tensor var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor([1, 1])]; - tensor var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor([1, 1])]; - int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)]; - tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25407808))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25360064))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_938_cast_fp16")]; - tensor hidden_states_11_cast_fp16 = add(x = var_932_cast_fp16, y = var_938_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; - tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_17_cast_fp16")]; - int32 var_948 = const()[name = string("op_948"), val = int32(3)]; - tensor out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor([1])]; - fp16 var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_967_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")]; - tensor obj_17_gamma_0_to_fp16 = const()[name = string("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25702784)))]; - tensor obj_17_beta_0_to_fp16 = const()[name = string("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25704384)))]; - fp16 obj_17_epsilon_0_to_fp16 = const()[name = string("obj_17_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("obj_17_cast_fp16")]; - string var_989_pad_type_0 = const()[name = string("op_989_pad_type_0"), val = string("valid")]; - tensor var_989_strides_0 = const()[name = string("op_989_strides_0"), val = tensor([1, 1])]; - tensor var_989_pad_0 = const()[name = string("op_989_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_989_dilations_0 = const()[name = string("op_989_dilations_0"), val = tensor([1, 1])]; - int32 var_989_groups_0 = const()[name = string("op_989_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25705984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26000960))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26001088)))]; - tensor var_989_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_989_dilations_0, groups = var_989_groups_0, pad = var_989_pad_0, pad_type = var_989_pad_type_0, strides = var_989_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_989_cast_fp16")]; - string var_995_pad_type_0 = const()[name = string("op_995_pad_type_0"), val = string("valid")]; - tensor var_995_strides_0 = const()[name = string("op_995_strides_0"), val = tensor([1, 1])]; - tensor var_995_pad_0 = const()[name = string("op_995_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_995_dilations_0 = const()[name = string("op_995_dilations_0"), val = tensor([1, 1])]; - int32 var_995_groups_0 = const()[name = string("op_995_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26012480))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26002688))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_995_cast_fp16 = conv(dilations = var_995_dilations_0, groups = var_995_groups_0, pad = var_995_pad_0, pad_type = var_995_pad_type_0, strides = var_995_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_995_cast_fp16")]; - tensor query_9_cast_fp16 = add(x = var_989_cast_fp16, y = var_995_cast_fp16)[name = string("query_9_cast_fp16")]; - string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")]; - tensor var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor([1, 1])]; - tensor var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor([1, 1])]; - int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26086272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26381248))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1004_cast_fp16 = conv(dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1004_cast_fp16")]; - string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")]; - tensor var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor([1, 1])]; - tensor var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor([1, 1])]; - int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26392192))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26381376))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1010_cast_fp16")]; - tensor key_9_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("key_9_cast_fp16")]; - string var_1020_pad_type_0 = const()[name = string("op_1020_pad_type_0"), val = string("valid")]; - tensor var_1020_strides_0 = const()[name = string("op_1020_strides_0"), val = tensor([1, 1])]; - tensor var_1020_pad_0 = const()[name = string("op_1020_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1020_dilations_0 = const()[name = string("op_1020_dilations_0"), val = tensor([1, 1])]; - int32 var_1020_groups_0 = const()[name = string("op_1020_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26465984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26760960))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26761088)))]; - tensor var_1020_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1020_cast_fp16")]; - string var_1026_pad_type_0 = const()[name = string("op_1026_pad_type_0"), val = string("valid")]; - tensor var_1026_strides_0 = const()[name = string("op_1026_strides_0"), val = tensor([1, 1])]; - tensor var_1026_pad_0 = const()[name = string("op_1026_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1026_dilations_0 = const()[name = string("op_1026_dilations_0"), val = tensor([1, 1])]; - int32 var_1026_groups_0 = const()[name = string("op_1026_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26769984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26762688))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1026_cast_fp16")]; - tensor value_9_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1026_cast_fp16)[name = string("value_9_cast_fp16")]; - tensor var_1029 = const()[name = string("op_1029"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_9_cast_fp16 = reshape(shape = var_1029, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")]; - fp16 var_1031_to_fp16 = const()[name = string("op_1031_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1032_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1031_to_fp16)[name = string("op_1032_cast_fp16")]; - tensor var_1033 = const()[name = string("op_1033"), val = tensor([1, 12, 64, -1])]; - tensor var_1034_cast_fp16 = reshape(shape = var_1033, x = key_9_cast_fp16)[name = string("op_1034_cast_fp16")]; - bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; - bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; - tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1032_cast_fp16, y = var_1034_cast_fp16)[name = string("mh_w_9_cast_fp16")]; - tensor var_1037_cast_fp16 = softmax(axis = var_948, x = mh_w_9_cast_fp16)[name = string("op_1037_cast_fp16")]; - tensor var_1038 = const()[name = string("op_1038"), val = tensor([1, 12, 64, -1])]; - tensor var_1039_cast_fp16 = reshape(shape = var_1038, x = value_9_cast_fp16)[name = string("op_1039_cast_fp16")]; - bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; - bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; - tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_9_cast_fp16")]; - tensor var_1042 = const()[name = string("op_1042"), val = tensor([1, 768, 1, -1])]; - tensor input_33_cast_fp16 = reshape(shape = var_1042, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")]; - string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")]; - tensor var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor([1, 1])]; - tensor var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor([1, 1])]; - int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26843776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27138752))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27138880)))]; - tensor var_1052_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1052_cast_fp16")]; - string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")]; - tensor var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor([1, 1])]; - tensor var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor([1, 1])]; - int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)]; - tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27146816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27140480))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1058_cast_fp16")]; - tensor obj_19_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_19_cast_fp16")]; - tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_19_cast_fp16")]; - tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; - fp16 var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1069_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")]; - tensor input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27220608)))]; - tensor input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27222208)))]; - fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("input_35_cast_fp16")]; - string var_1087_pad_type_0 = const()[name = string("op_1087_pad_type_0"), val = string("valid")]; - tensor var_1087_strides_0 = const()[name = string("op_1087_strides_0"), val = tensor([1, 1])]; - tensor var_1087_pad_0 = const()[name = string("op_1087_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1087_dilations_0 = const()[name = string("op_1087_dilations_0"), val = tensor([1, 1])]; - int32 var_1087_groups_0 = const()[name = string("op_1087_groups_0"), val = int32(1)]; - tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27223808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28403520))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28403648)))]; - tensor var_1087_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1087_dilations_0, groups = var_1087_groups_0, pad = var_1087_pad_0, pad_type = var_1087_pad_type_0, strides = var_1087_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1087_cast_fp16")]; - string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")]; - tensor var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor([1, 1])]; - tensor var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor([1, 1])]; - int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)]; - tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28452288))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28409856))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1093_cast_fp16 = conv(dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1093_cast_fp16")]; - tensor input_37_cast_fp16 = add(x = var_1087_cast_fp16, y = var_1093_cast_fp16)[name = string("input_37_cast_fp16")]; - string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")]; - tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")]; - string var_1104_pad_type_0 = const()[name = string("op_1104_pad_type_0"), val = string("valid")]; - tensor var_1104_strides_0 = const()[name = string("op_1104_strides_0"), val = tensor([1, 1])]; - tensor var_1104_pad_0 = const()[name = string("op_1104_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1104_dilations_0 = const()[name = string("op_1104_dilations_0"), val = tensor([1, 1])]; - int32 var_1104_groups_0 = const()[name = string("op_1104_groups_0"), val = int32(1)]; - tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28747264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29926976))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29927104)))]; - tensor var_1104_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1104_dilations_0, groups = var_1104_groups_0, pad = var_1104_pad_0, pad_type = var_1104_pad_type_0, strides = var_1104_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1104_cast_fp16")]; - string var_1110_pad_type_0 = const()[name = string("op_1110_pad_type_0"), val = string("valid")]; - tensor var_1110_strides_0 = const()[name = string("op_1110_strides_0"), val = tensor([1, 1])]; - tensor var_1110_pad_0 = const()[name = string("op_1110_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1110_dilations_0 = const()[name = string("op_1110_dilations_0"), val = tensor([1, 1])]; - int32 var_1110_groups_0 = const()[name = string("op_1110_groups_0"), val = int32(1)]; - tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29977152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29928704))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1110_cast_fp16 = conv(dilations = var_1110_dilations_0, groups = var_1110_groups_0, pad = var_1110_pad_0, pad_type = var_1110_pad_type_0, strides = var_1110_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1110_cast_fp16")]; - tensor hidden_states_13_cast_fp16 = add(x = var_1104_cast_fp16, y = var_1110_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; - tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_21_cast_fp16")]; - int32 var_1120 = const()[name = string("op_1120"), val = int32(3)]; - tensor out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor([1])]; - fp16 var_1139_to_fp16 = const()[name = string("op_1139_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1139_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")]; - tensor obj_21_gamma_0_to_fp16 = const()[name = string("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30272128)))]; - tensor obj_21_beta_0_to_fp16 = const()[name = string("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30273728)))]; - fp16 obj_21_epsilon_0_to_fp16 = const()[name = string("obj_21_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_21_cast_fp16")]; - string var_1161_pad_type_0 = const()[name = string("op_1161_pad_type_0"), val = string("valid")]; - tensor var_1161_strides_0 = const()[name = string("op_1161_strides_0"), val = tensor([1, 1])]; - tensor var_1161_pad_0 = const()[name = string("op_1161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1161_dilations_0 = const()[name = string("op_1161_dilations_0"), val = tensor([1, 1])]; - int32 var_1161_groups_0 = const()[name = string("op_1161_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30275328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30570304))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30570432)))]; - tensor var_1161_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1161_cast_fp16")]; - string var_1167_pad_type_0 = const()[name = string("op_1167_pad_type_0"), val = string("valid")]; - tensor var_1167_strides_0 = const()[name = string("op_1167_strides_0"), val = tensor([1, 1])]; - tensor var_1167_pad_0 = const()[name = string("op_1167_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1167_dilations_0 = const()[name = string("op_1167_dilations_0"), val = tensor([1, 1])]; - int32 var_1167_groups_0 = const()[name = string("op_1167_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30579968))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30572032))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1167_cast_fp16")]; - tensor query_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = string("query_11_cast_fp16")]; - string var_1176_pad_type_0 = const()[name = string("op_1176_pad_type_0"), val = string("valid")]; - tensor var_1176_strides_0 = const()[name = string("op_1176_strides_0"), val = tensor([1, 1])]; - tensor var_1176_pad_0 = const()[name = string("op_1176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1176_dilations_0 = const()[name = string("op_1176_dilations_0"), val = tensor([1, 1])]; - int32 var_1176_groups_0 = const()[name = string("op_1176_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30653760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30948736))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1176_cast_fp16 = conv(dilations = var_1176_dilations_0, groups = var_1176_groups_0, pad = var_1176_pad_0, pad_type = var_1176_pad_type_0, strides = var_1176_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1176_cast_fp16")]; - string var_1182_pad_type_0 = const()[name = string("op_1182_pad_type_0"), val = string("valid")]; - tensor var_1182_strides_0 = const()[name = string("op_1182_strides_0"), val = tensor([1, 1])]; - tensor var_1182_pad_0 = const()[name = string("op_1182_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1182_dilations_0 = const()[name = string("op_1182_dilations_0"), val = tensor([1, 1])]; - int32 var_1182_groups_0 = const()[name = string("op_1182_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30957056))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30948864))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1182_cast_fp16 = conv(dilations = var_1182_dilations_0, groups = var_1182_groups_0, pad = var_1182_pad_0, pad_type = var_1182_pad_type_0, strides = var_1182_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1182_cast_fp16")]; - tensor key_11_cast_fp16 = add(x = var_1176_cast_fp16, y = var_1182_cast_fp16)[name = string("key_11_cast_fp16")]; - string var_1192_pad_type_0 = const()[name = string("op_1192_pad_type_0"), val = string("valid")]; - tensor var_1192_strides_0 = const()[name = string("op_1192_strides_0"), val = tensor([1, 1])]; - tensor var_1192_pad_0 = const()[name = string("op_1192_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1192_dilations_0 = const()[name = string("op_1192_dilations_0"), val = tensor([1, 1])]; - int32 var_1192_groups_0 = const()[name = string("op_1192_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31325824))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31325952)))]; - tensor var_1192_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1192_dilations_0, groups = var_1192_groups_0, pad = var_1192_pad_0, pad_type = var_1192_pad_type_0, strides = var_1192_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1192_cast_fp16")]; - string var_1198_pad_type_0 = const()[name = string("op_1198_pad_type_0"), val = string("valid")]; - tensor var_1198_strides_0 = const()[name = string("op_1198_strides_0"), val = tensor([1, 1])]; - tensor var_1198_pad_0 = const()[name = string("op_1198_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1198_dilations_0 = const()[name = string("op_1198_dilations_0"), val = tensor([1, 1])]; - int32 var_1198_groups_0 = const()[name = string("op_1198_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31334976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31327552))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1198_cast_fp16 = conv(dilations = var_1198_dilations_0, groups = var_1198_groups_0, pad = var_1198_pad_0, pad_type = var_1198_pad_type_0, strides = var_1198_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1198_cast_fp16")]; - tensor value_11_cast_fp16 = add(x = var_1192_cast_fp16, y = var_1198_cast_fp16)[name = string("value_11_cast_fp16")]; - tensor var_1201 = const()[name = string("op_1201"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_11_cast_fp16 = reshape(shape = var_1201, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")]; - fp16 var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1204_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1203_to_fp16)[name = string("op_1204_cast_fp16")]; - tensor var_1205 = const()[name = string("op_1205"), val = tensor([1, 12, 64, -1])]; - tensor var_1206_cast_fp16 = reshape(shape = var_1205, x = key_11_cast_fp16)[name = string("op_1206_cast_fp16")]; - bool mh_w_11_transpose_x_0 = const()[name = string("mh_w_11_transpose_x_0"), val = bool(true)]; - bool mh_w_11_transpose_y_0 = const()[name = string("mh_w_11_transpose_y_0"), val = bool(false)]; - tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1204_cast_fp16, y = var_1206_cast_fp16)[name = string("mh_w_11_cast_fp16")]; - tensor var_1209_cast_fp16 = softmax(axis = var_1120, x = mh_w_11_cast_fp16)[name = string("op_1209_cast_fp16")]; - tensor var_1210 = const()[name = string("op_1210"), val = tensor([1, 12, 64, -1])]; - tensor var_1211_cast_fp16 = reshape(shape = var_1210, x = value_11_cast_fp16)[name = string("op_1211_cast_fp16")]; - bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; - bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; - tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1211_cast_fp16, y = var_1209_cast_fp16)[name = string("attn_11_cast_fp16")]; - tensor var_1214 = const()[name = string("op_1214"), val = tensor([1, 768, 1, -1])]; - tensor input_41_cast_fp16 = reshape(shape = var_1214, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")]; - string var_1224_pad_type_0 = const()[name = string("op_1224_pad_type_0"), val = string("valid")]; - tensor var_1224_strides_0 = const()[name = string("op_1224_strides_0"), val = tensor([1, 1])]; - tensor var_1224_pad_0 = const()[name = string("op_1224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1224_dilations_0 = const()[name = string("op_1224_dilations_0"), val = tensor([1, 1])]; - int32 var_1224_groups_0 = const()[name = string("op_1224_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31408768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31703744))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31703872)))]; - tensor var_1224_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1224_dilations_0, groups = var_1224_groups_0, pad = var_1224_pad_0, pad_type = var_1224_pad_type_0, strides = var_1224_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1224_cast_fp16")]; - string var_1230_pad_type_0 = const()[name = string("op_1230_pad_type_0"), val = string("valid")]; - tensor var_1230_strides_0 = const()[name = string("op_1230_strides_0"), val = tensor([1, 1])]; - tensor var_1230_pad_0 = const()[name = string("op_1230_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1230_dilations_0 = const()[name = string("op_1230_dilations_0"), val = tensor([1, 1])]; - int32 var_1230_groups_0 = const()[name = string("op_1230_groups_0"), val = int32(1)]; - tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31713792))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31705472))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1230_cast_fp16 = conv(dilations = var_1230_dilations_0, groups = var_1230_groups_0, pad = var_1230_pad_0, pad_type = var_1230_pad_type_0, strides = var_1230_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1230_cast_fp16")]; - tensor obj_23_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1230_cast_fp16)[name = string("obj_23_cast_fp16")]; - tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = string("inputs_23_cast_fp16")]; - tensor out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor([1])]; - fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1241_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")]; - tensor input_43_gamma_0_to_fp16 = const()[name = string("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31787584)))]; - tensor input_43_beta_0_to_fp16 = const()[name = string("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31789184)))]; - fp16 input_43_epsilon_0_to_fp16 = const()[name = string("input_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_43_cast_fp16")]; - string var_1259_pad_type_0 = const()[name = string("op_1259_pad_type_0"), val = string("valid")]; - tensor var_1259_strides_0 = const()[name = string("op_1259_strides_0"), val = tensor([1, 1])]; - tensor var_1259_pad_0 = const()[name = string("op_1259_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1259_dilations_0 = const()[name = string("op_1259_dilations_0"), val = tensor([1, 1])]; - int32 var_1259_groups_0 = const()[name = string("op_1259_groups_0"), val = int32(1)]; - tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32970496))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32970624)))]; - tensor var_1259_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1259_dilations_0, groups = var_1259_groups_0, pad = var_1259_pad_0, pad_type = var_1259_pad_type_0, strides = var_1259_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1259_cast_fp16")]; - string var_1265_pad_type_0 = const()[name = string("op_1265_pad_type_0"), val = string("valid")]; - tensor var_1265_strides_0 = const()[name = string("op_1265_strides_0"), val = tensor([1, 1])]; - tensor var_1265_pad_0 = const()[name = string("op_1265_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1265_dilations_0 = const()[name = string("op_1265_dilations_0"), val = tensor([1, 1])]; - int32 var_1265_groups_0 = const()[name = string("op_1265_groups_0"), val = int32(1)]; - tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33018432))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32976832))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1265_cast_fp16 = conv(dilations = var_1265_dilations_0, groups = var_1265_groups_0, pad = var_1265_pad_0, pad_type = var_1265_pad_type_0, strides = var_1265_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1265_cast_fp16")]; - tensor input_45_cast_fp16 = add(x = var_1259_cast_fp16, y = var_1265_cast_fp16)[name = string("input_45_cast_fp16")]; - string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("EXACT")]; - tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")]; - string var_1276_pad_type_0 = const()[name = string("op_1276_pad_type_0"), val = string("valid")]; - tensor var_1276_strides_0 = const()[name = string("op_1276_strides_0"), val = tensor([1, 1])]; - tensor var_1276_pad_0 = const()[name = string("op_1276_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1276_dilations_0 = const()[name = string("op_1276_dilations_0"), val = tensor([1, 1])]; - int32 var_1276_groups_0 = const()[name = string("op_1276_groups_0"), val = int32(1)]; - tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33313408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34493120))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34493248)))]; - tensor var_1276_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1276_dilations_0, groups = var_1276_groups_0, pad = var_1276_pad_0, pad_type = var_1276_pad_type_0, strides = var_1276_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_1276_cast_fp16")]; - string var_1282_pad_type_0 = const()[name = string("op_1282_pad_type_0"), val = string("valid")]; - tensor var_1282_strides_0 = const()[name = string("op_1282_strides_0"), val = tensor([1, 1])]; - tensor var_1282_pad_0 = const()[name = string("op_1282_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1282_dilations_0 = const()[name = string("op_1282_dilations_0"), val = tensor([1, 1])]; - int32 var_1282_groups_0 = const()[name = string("op_1282_groups_0"), val = int32(1)]; - tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34539520))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34494848))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1282_cast_fp16 = conv(dilations = var_1282_dilations_0, groups = var_1282_groups_0, pad = var_1282_pad_0, pad_type = var_1282_pad_type_0, strides = var_1282_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = string("op_1282_cast_fp16")]; - tensor hidden_states_15_cast_fp16 = add(x = var_1276_cast_fp16, y = var_1282_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; - tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_25_cast_fp16")]; - int32 var_1292 = const()[name = string("op_1292"), val = int32(3)]; - tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; - fp16 var_1311_to_fp16 = const()[name = string("op_1311_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1311_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")]; - tensor obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34834496)))]; - tensor obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34836096)))]; - fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_25_cast_fp16")]; - string var_1333_pad_type_0 = const()[name = string("op_1333_pad_type_0"), val = string("valid")]; - tensor var_1333_strides_0 = const()[name = string("op_1333_strides_0"), val = tensor([1, 1])]; - tensor var_1333_pad_0 = const()[name = string("op_1333_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1333_dilations_0 = const()[name = string("op_1333_dilations_0"), val = tensor([1, 1])]; - int32 var_1333_groups_0 = const()[name = string("op_1333_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34837696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35132672))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35132800)))]; - tensor var_1333_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1333_dilations_0, groups = var_1333_groups_0, pad = var_1333_pad_0, pad_type = var_1333_pad_type_0, strides = var_1333_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1333_cast_fp16")]; - string var_1339_pad_type_0 = const()[name = string("op_1339_pad_type_0"), val = string("valid")]; - tensor var_1339_strides_0 = const()[name = string("op_1339_strides_0"), val = tensor([1, 1])]; - tensor var_1339_pad_0 = const()[name = string("op_1339_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1339_dilations_0 = const()[name = string("op_1339_dilations_0"), val = tensor([1, 1])]; - int32 var_1339_groups_0 = const()[name = string("op_1339_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35142208))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35134400))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1339_cast_fp16 = conv(dilations = var_1339_dilations_0, groups = var_1339_groups_0, pad = var_1339_pad_0, pad_type = var_1339_pad_type_0, strides = var_1339_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1339_cast_fp16")]; - tensor query_13_cast_fp16 = add(x = var_1333_cast_fp16, y = var_1339_cast_fp16)[name = string("query_13_cast_fp16")]; - string var_1348_pad_type_0 = const()[name = string("op_1348_pad_type_0"), val = string("valid")]; - tensor var_1348_strides_0 = const()[name = string("op_1348_strides_0"), val = tensor([1, 1])]; - tensor var_1348_pad_0 = const()[name = string("op_1348_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1348_dilations_0 = const()[name = string("op_1348_dilations_0"), val = tensor([1, 1])]; - int32 var_1348_groups_0 = const()[name = string("op_1348_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35216000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35510976))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1348_cast_fp16 = conv(dilations = var_1348_dilations_0, groups = var_1348_groups_0, pad = var_1348_pad_0, pad_type = var_1348_pad_type_0, strides = var_1348_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1348_cast_fp16")]; - string var_1354_pad_type_0 = const()[name = string("op_1354_pad_type_0"), val = string("valid")]; - tensor var_1354_strides_0 = const()[name = string("op_1354_strides_0"), val = tensor([1, 1])]; - tensor var_1354_pad_0 = const()[name = string("op_1354_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1354_dilations_0 = const()[name = string("op_1354_dilations_0"), val = tensor([1, 1])]; - int32 var_1354_groups_0 = const()[name = string("op_1354_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35518848))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35511104))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1354_cast_fp16 = conv(dilations = var_1354_dilations_0, groups = var_1354_groups_0, pad = var_1354_pad_0, pad_type = var_1354_pad_type_0, strides = var_1354_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1354_cast_fp16")]; - tensor key_13_cast_fp16 = add(x = var_1348_cast_fp16, y = var_1354_cast_fp16)[name = string("key_13_cast_fp16")]; - string var_1364_pad_type_0 = const()[name = string("op_1364_pad_type_0"), val = string("valid")]; - tensor var_1364_strides_0 = const()[name = string("op_1364_strides_0"), val = tensor([1, 1])]; - tensor var_1364_pad_0 = const()[name = string("op_1364_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1364_dilations_0 = const()[name = string("op_1364_dilations_0"), val = tensor([1, 1])]; - int32 var_1364_groups_0 = const()[name = string("op_1364_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35592640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35887616))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35887744)))]; - tensor var_1364_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1364_dilations_0, groups = var_1364_groups_0, pad = var_1364_pad_0, pad_type = var_1364_pad_type_0, strides = var_1364_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1364_cast_fp16")]; - string var_1370_pad_type_0 = const()[name = string("op_1370_pad_type_0"), val = string("valid")]; - tensor var_1370_strides_0 = const()[name = string("op_1370_strides_0"), val = tensor([1, 1])]; - tensor var_1370_pad_0 = const()[name = string("op_1370_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1370_dilations_0 = const()[name = string("op_1370_dilations_0"), val = tensor([1, 1])]; - int32 var_1370_groups_0 = const()[name = string("op_1370_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35895296))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35889344))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1370_cast_fp16 = conv(dilations = var_1370_dilations_0, groups = var_1370_groups_0, pad = var_1370_pad_0, pad_type = var_1370_pad_type_0, strides = var_1370_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1370_cast_fp16")]; - tensor value_13_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1370_cast_fp16)[name = string("value_13_cast_fp16")]; - tensor var_1373 = const()[name = string("op_1373"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_13_cast_fp16 = reshape(shape = var_1373, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")]; - fp16 var_1375_to_fp16 = const()[name = string("op_1375_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1376_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1375_to_fp16)[name = string("op_1376_cast_fp16")]; - tensor var_1377 = const()[name = string("op_1377"), val = tensor([1, 12, 64, -1])]; - tensor var_1378_cast_fp16 = reshape(shape = var_1377, x = key_13_cast_fp16)[name = string("op_1378_cast_fp16")]; - bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; - bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; - tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1376_cast_fp16, y = var_1378_cast_fp16)[name = string("mh_w_13_cast_fp16")]; - tensor var_1381_cast_fp16 = softmax(axis = var_1292, x = mh_w_13_cast_fp16)[name = string("op_1381_cast_fp16")]; - tensor var_1382 = const()[name = string("op_1382"), val = tensor([1, 12, 64, -1])]; - tensor var_1383_cast_fp16 = reshape(shape = var_1382, x = value_13_cast_fp16)[name = string("op_1383_cast_fp16")]; - bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; - bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; - tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1383_cast_fp16, y = var_1381_cast_fp16)[name = string("attn_13_cast_fp16")]; - tensor var_1386 = const()[name = string("op_1386"), val = tensor([1, 768, 1, -1])]; - tensor input_49_cast_fp16 = reshape(shape = var_1386, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")]; - string var_1396_pad_type_0 = const()[name = string("op_1396_pad_type_0"), val = string("valid")]; - tensor var_1396_strides_0 = const()[name = string("op_1396_strides_0"), val = tensor([1, 1])]; - tensor var_1396_pad_0 = const()[name = string("op_1396_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1396_dilations_0 = const()[name = string("op_1396_dilations_0"), val = tensor([1, 1])]; - int32 var_1396_groups_0 = const()[name = string("op_1396_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35969088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36264064))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36264192)))]; - tensor var_1396_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1396_dilations_0, groups = var_1396_groups_0, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1396_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1396_cast_fp16")]; - string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; - tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; - tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; - int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; - tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36270720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36265792))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1402_cast_fp16 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1402_cast_fp16")]; - tensor obj_27_cast_fp16 = add(x = var_1396_cast_fp16, y = var_1402_cast_fp16)[name = string("obj_27_cast_fp16")]; - tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_27_cast_fp16")]; - tensor out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor([1])]; - fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1413_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")]; - tensor input_51_gamma_0_to_fp16 = const()[name = string("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36344512)))]; - tensor input_51_beta_0_to_fp16 = const()[name = string("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36346112)))]; - fp16 input_51_epsilon_0_to_fp16 = const()[name = string("input_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("input_51_cast_fp16")]; - string var_1431_pad_type_0 = const()[name = string("op_1431_pad_type_0"), val = string("valid")]; - tensor var_1431_strides_0 = const()[name = string("op_1431_strides_0"), val = tensor([1, 1])]; - tensor var_1431_pad_0 = const()[name = string("op_1431_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1431_dilations_0 = const()[name = string("op_1431_dilations_0"), val = tensor([1, 1])]; - int32 var_1431_groups_0 = const()[name = string("op_1431_groups_0"), val = int32(1)]; - tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36347712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37527424))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37527552)))]; - tensor var_1431_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1431_dilations_0, groups = var_1431_groups_0, pad = var_1431_pad_0, pad_type = var_1431_pad_type_0, strides = var_1431_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1431_cast_fp16")]; - string var_1437_pad_type_0 = const()[name = string("op_1437_pad_type_0"), val = string("valid")]; - tensor var_1437_strides_0 = const()[name = string("op_1437_strides_0"), val = tensor([1, 1])]; - tensor var_1437_pad_0 = const()[name = string("op_1437_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1437_dilations_0 = const()[name = string("op_1437_dilations_0"), val = tensor([1, 1])]; - int32 var_1437_groups_0 = const()[name = string("op_1437_groups_0"), val = int32(1)]; - tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37566848))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37533760))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1437_cast_fp16 = conv(dilations = var_1437_dilations_0, groups = var_1437_groups_0, pad = var_1437_pad_0, pad_type = var_1437_pad_type_0, strides = var_1437_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1437_cast_fp16")]; - tensor input_53_cast_fp16 = add(x = var_1431_cast_fp16, y = var_1437_cast_fp16)[name = string("input_53_cast_fp16")]; - string input_55_mode_0 = const()[name = string("input_55_mode_0"), val = string("EXACT")]; - tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = string("input_55_cast_fp16")]; - string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")]; - tensor var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor([1, 1])]; - tensor var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor([1, 1])]; - int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)]; - tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37861824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39041536))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39041664)))]; - tensor var_1448_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1448_cast_fp16")]; - string var_1454_pad_type_0 = const()[name = string("op_1454_pad_type_0"), val = string("valid")]; - tensor var_1454_strides_0 = const()[name = string("op_1454_strides_0"), val = tensor([1, 1])]; - tensor var_1454_pad_0 = const()[name = string("op_1454_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1454_dilations_0 = const()[name = string("op_1454_dilations_0"), val = tensor([1, 1])]; - int32 var_1454_groups_0 = const()[name = string("op_1454_groups_0"), val = int32(1)]; - tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39079168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39043264))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1454_cast_fp16 = conv(dilations = var_1454_dilations_0, groups = var_1454_groups_0, pad = var_1454_pad_0, pad_type = var_1454_pad_type_0, strides = var_1454_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1454_cast_fp16")]; - tensor hidden_states_17_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1454_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; - tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_29_cast_fp16")]; - int32 var_1464 = const()[name = string("op_1464"), val = int32(3)]; - tensor out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor([1])]; - fp16 var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1483_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")]; - tensor obj_29_gamma_0_to_fp16 = const()[name = string("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39374144)))]; - tensor obj_29_beta_0_to_fp16 = const()[name = string("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39375744)))]; - fp16 obj_29_epsilon_0_to_fp16 = const()[name = string("obj_29_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("obj_29_cast_fp16")]; - string var_1505_pad_type_0 = const()[name = string("op_1505_pad_type_0"), val = string("valid")]; - tensor var_1505_strides_0 = const()[name = string("op_1505_strides_0"), val = tensor([1, 1])]; - tensor var_1505_pad_0 = const()[name = string("op_1505_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1505_dilations_0 = const()[name = string("op_1505_dilations_0"), val = tensor([1, 1])]; - int32 var_1505_groups_0 = const()[name = string("op_1505_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39377344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39672320))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39672448)))]; - tensor var_1505_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1505_dilations_0, groups = var_1505_groups_0, pad = var_1505_pad_0, pad_type = var_1505_pad_type_0, strides = var_1505_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1505_cast_fp16")]; - string var_1511_pad_type_0 = const()[name = string("op_1511_pad_type_0"), val = string("valid")]; - tensor var_1511_strides_0 = const()[name = string("op_1511_strides_0"), val = tensor([1, 1])]; - tensor var_1511_pad_0 = const()[name = string("op_1511_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1511_dilations_0 = const()[name = string("op_1511_dilations_0"), val = tensor([1, 1])]; - int32 var_1511_groups_0 = const()[name = string("op_1511_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39680704))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39674048))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1511_cast_fp16 = conv(dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1511_cast_fp16")]; - tensor query_15_cast_fp16 = add(x = var_1505_cast_fp16, y = var_1511_cast_fp16)[name = string("query_15_cast_fp16")]; - string var_1520_pad_type_0 = const()[name = string("op_1520_pad_type_0"), val = string("valid")]; - tensor var_1520_strides_0 = const()[name = string("op_1520_strides_0"), val = tensor([1, 1])]; - tensor var_1520_pad_0 = const()[name = string("op_1520_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1520_dilations_0 = const()[name = string("op_1520_dilations_0"), val = tensor([1, 1])]; - int32 var_1520_groups_0 = const()[name = string("op_1520_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39754496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40049472))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1520_cast_fp16 = conv(dilations = var_1520_dilations_0, groups = var_1520_groups_0, pad = var_1520_pad_0, pad_type = var_1520_pad_type_0, strides = var_1520_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1520_cast_fp16")]; - string var_1526_pad_type_0 = const()[name = string("op_1526_pad_type_0"), val = string("valid")]; - tensor var_1526_strides_0 = const()[name = string("op_1526_strides_0"), val = tensor([1, 1])]; - tensor var_1526_pad_0 = const()[name = string("op_1526_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1526_dilations_0 = const()[name = string("op_1526_dilations_0"), val = tensor([1, 1])]; - int32 var_1526_groups_0 = const()[name = string("op_1526_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40056832))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40049600))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1526_cast_fp16")]; - tensor key_15_cast_fp16 = add(x = var_1520_cast_fp16, y = var_1526_cast_fp16)[name = string("key_15_cast_fp16")]; - string var_1536_pad_type_0 = const()[name = string("op_1536_pad_type_0"), val = string("valid")]; - tensor var_1536_strides_0 = const()[name = string("op_1536_strides_0"), val = tensor([1, 1])]; - tensor var_1536_pad_0 = const()[name = string("op_1536_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1536_dilations_0 = const()[name = string("op_1536_dilations_0"), val = tensor([1, 1])]; - int32 var_1536_groups_0 = const()[name = string("op_1536_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40130624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40425600))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40425728)))]; - tensor var_1536_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1536_dilations_0, groups = var_1536_groups_0, pad = var_1536_pad_0, pad_type = var_1536_pad_type_0, strides = var_1536_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1536_cast_fp16")]; - string var_1542_pad_type_0 = const()[name = string("op_1542_pad_type_0"), val = string("valid")]; - tensor var_1542_strides_0 = const()[name = string("op_1542_strides_0"), val = tensor([1, 1])]; - tensor var_1542_pad_0 = const()[name = string("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1542_dilations_0 = const()[name = string("op_1542_dilations_0"), val = tensor([1, 1])]; - int32 var_1542_groups_0 = const()[name = string("op_1542_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40433728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40427328))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1542_cast_fp16 = conv(dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1542_cast_fp16")]; - tensor value_15_cast_fp16 = add(x = var_1536_cast_fp16, y = var_1542_cast_fp16)[name = string("value_15_cast_fp16")]; - tensor var_1545 = const()[name = string("op_1545"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_15_cast_fp16 = reshape(shape = var_1545, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")]; - fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1548_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1547_to_fp16)[name = string("op_1548_cast_fp16")]; - tensor var_1549 = const()[name = string("op_1549"), val = tensor([1, 12, 64, -1])]; - tensor var_1550_cast_fp16 = reshape(shape = var_1549, x = key_15_cast_fp16)[name = string("op_1550_cast_fp16")]; - bool mh_w_15_transpose_x_0 = const()[name = string("mh_w_15_transpose_x_0"), val = bool(true)]; - bool mh_w_15_transpose_y_0 = const()[name = string("mh_w_15_transpose_y_0"), val = bool(false)]; - tensor mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1548_cast_fp16, y = var_1550_cast_fp16)[name = string("mh_w_15_cast_fp16")]; - tensor var_1553_cast_fp16 = softmax(axis = var_1464, x = mh_w_15_cast_fp16)[name = string("op_1553_cast_fp16")]; - tensor var_1554 = const()[name = string("op_1554"), val = tensor([1, 12, 64, -1])]; - tensor var_1555_cast_fp16 = reshape(shape = var_1554, x = value_15_cast_fp16)[name = string("op_1555_cast_fp16")]; - bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)]; - bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)]; - tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1555_cast_fp16, y = var_1553_cast_fp16)[name = string("attn_15_cast_fp16")]; - tensor var_1558 = const()[name = string("op_1558"), val = tensor([1, 768, 1, -1])]; - tensor input_57_cast_fp16 = reshape(shape = var_1558, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")]; - string var_1568_pad_type_0 = const()[name = string("op_1568_pad_type_0"), val = string("valid")]; - tensor var_1568_strides_0 = const()[name = string("op_1568_strides_0"), val = tensor([1, 1])]; - tensor var_1568_pad_0 = const()[name = string("op_1568_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1568_dilations_0 = const()[name = string("op_1568_dilations_0"), val = tensor([1, 1])]; - int32 var_1568_groups_0 = const()[name = string("op_1568_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40507520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40802496))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40802624)))]; - tensor var_1568_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("op_1568_cast_fp16")]; - string var_1574_pad_type_0 = const()[name = string("op_1574_pad_type_0"), val = string("valid")]; - tensor var_1574_strides_0 = const()[name = string("op_1574_strides_0"), val = tensor([1, 1])]; - tensor var_1574_pad_0 = const()[name = string("op_1574_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1574_dilations_0 = const()[name = string("op_1574_dilations_0"), val = tensor([1, 1])]; - int32 var_1574_groups_0 = const()[name = string("op_1574_groups_0"), val = int32(1)]; - tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40810112))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40804224))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1574_cast_fp16 = conv(dilations = var_1574_dilations_0, groups = var_1574_groups_0, pad = var_1574_pad_0, pad_type = var_1574_pad_type_0, strides = var_1574_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = string("op_1574_cast_fp16")]; - tensor obj_31_cast_fp16 = add(x = var_1568_cast_fp16, y = var_1574_cast_fp16)[name = string("obj_31_cast_fp16")]; - tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_31_cast_fp16")]; - tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; - fp16 var_1585_to_fp16 = const()[name = string("op_1585_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1585_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")]; - tensor input_59_gamma_0_to_fp16 = const()[name = string("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40883904)))]; - tensor input_59_beta_0_to_fp16 = const()[name = string("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40885504)))]; - fp16 input_59_epsilon_0_to_fp16 = const()[name = string("input_59_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("input_59_cast_fp16")]; - string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")]; - tensor var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor([1, 1])]; - tensor var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor([1, 1])]; - int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)]; - tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40887104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42066816))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42066944)))]; - tensor var_1603_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1603_cast_fp16")]; - string var_1609_pad_type_0 = const()[name = string("op_1609_pad_type_0"), val = string("valid")]; - tensor var_1609_strides_0 = const()[name = string("op_1609_strides_0"), val = tensor([1, 1])]; - tensor var_1609_pad_0 = const()[name = string("op_1609_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1609_dilations_0 = const()[name = string("op_1609_dilations_0"), val = tensor([1, 1])]; - int32 var_1609_groups_0 = const()[name = string("op_1609_groups_0"), val = int32(1)]; - tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42110976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42073152))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1609_cast_fp16 = conv(dilations = var_1609_dilations_0, groups = var_1609_groups_0, pad = var_1609_pad_0, pad_type = var_1609_pad_type_0, strides = var_1609_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1609_cast_fp16")]; - tensor input_61_cast_fp16 = add(x = var_1603_cast_fp16, y = var_1609_cast_fp16)[name = string("input_61_cast_fp16")]; - string input_63_mode_0 = const()[name = string("input_63_mode_0"), val = string("EXACT")]; - tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = string("input_63_cast_fp16")]; - string var_1620_pad_type_0 = const()[name = string("op_1620_pad_type_0"), val = string("valid")]; - tensor var_1620_strides_0 = const()[name = string("op_1620_strides_0"), val = tensor([1, 1])]; - tensor var_1620_pad_0 = const()[name = string("op_1620_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1620_dilations_0 = const()[name = string("op_1620_dilations_0"), val = tensor([1, 1])]; - int32 var_1620_groups_0 = const()[name = string("op_1620_groups_0"), val = int32(1)]; - tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42405952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43585664))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43585792)))]; - tensor var_1620_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1620_dilations_0, groups = var_1620_groups_0, pad = var_1620_pad_0, pad_type = var_1620_pad_type_0, strides = var_1620_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_1620_cast_fp16")]; - string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")]; - tensor var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor([1, 1])]; - tensor var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor([1, 1])]; - int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)]; - tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43616960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43587392))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1626_cast_fp16 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_1626_cast_fp16")]; - tensor hidden_states_19_cast_fp16 = add(x = var_1620_cast_fp16, y = var_1626_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; - tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_33_cast_fp16")]; - int32 var_1636 = const()[name = string("op_1636"), val = int32(3)]; - tensor out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor([1])]; - fp16 var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1655_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")]; - tensor obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43911936)))]; - tensor obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43913536)))]; - fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_33_cast_fp16")]; - string var_1677_pad_type_0 = const()[name = string("op_1677_pad_type_0"), val = string("valid")]; - tensor var_1677_strides_0 = const()[name = string("op_1677_strides_0"), val = tensor([1, 1])]; - tensor var_1677_pad_0 = const()[name = string("op_1677_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1677_dilations_0 = const()[name = string("op_1677_dilations_0"), val = tensor([1, 1])]; - int32 var_1677_groups_0 = const()[name = string("op_1677_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43915136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44210112))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44210240)))]; - tensor var_1677_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1677_dilations_0, groups = var_1677_groups_0, pad = var_1677_pad_0, pad_type = var_1677_pad_type_0, strides = var_1677_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1677_cast_fp16")]; - string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")]; - tensor var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor([1, 1])]; - tensor var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor([1, 1])]; - int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44219008))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44211840))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1683_cast_fp16")]; - tensor query_17_cast_fp16 = add(x = var_1677_cast_fp16, y = var_1683_cast_fp16)[name = string("query_17_cast_fp16")]; - string var_1692_pad_type_0 = const()[name = string("op_1692_pad_type_0"), val = string("valid")]; - tensor var_1692_strides_0 = const()[name = string("op_1692_strides_0"), val = tensor([1, 1])]; - tensor var_1692_pad_0 = const()[name = string("op_1692_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1692_dilations_0 = const()[name = string("op_1692_dilations_0"), val = tensor([1, 1])]; - int32 var_1692_groups_0 = const()[name = string("op_1692_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44292800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44587776))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1692_cast_fp16 = conv(dilations = var_1692_dilations_0, groups = var_1692_groups_0, pad = var_1692_pad_0, pad_type = var_1692_pad_type_0, strides = var_1692_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1692_cast_fp16")]; - string var_1698_pad_type_0 = const()[name = string("op_1698_pad_type_0"), val = string("valid")]; - tensor var_1698_strides_0 = const()[name = string("op_1698_strides_0"), val = tensor([1, 1])]; - tensor var_1698_pad_0 = const()[name = string("op_1698_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1698_dilations_0 = const()[name = string("op_1698_dilations_0"), val = tensor([1, 1])]; - int32 var_1698_groups_0 = const()[name = string("op_1698_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44595136))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44587904))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1698_cast_fp16 = conv(dilations = var_1698_dilations_0, groups = var_1698_groups_0, pad = var_1698_pad_0, pad_type = var_1698_pad_type_0, strides = var_1698_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1698_cast_fp16")]; - tensor key_17_cast_fp16 = add(x = var_1692_cast_fp16, y = var_1698_cast_fp16)[name = string("key_17_cast_fp16")]; - string var_1708_pad_type_0 = const()[name = string("op_1708_pad_type_0"), val = string("valid")]; - tensor var_1708_strides_0 = const()[name = string("op_1708_strides_0"), val = tensor([1, 1])]; - tensor var_1708_pad_0 = const()[name = string("op_1708_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1708_dilations_0 = const()[name = string("op_1708_dilations_0"), val = tensor([1, 1])]; - int32 var_1708_groups_0 = const()[name = string("op_1708_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44668928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44963904))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44964032)))]; - tensor var_1708_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1708_dilations_0, groups = var_1708_groups_0, pad = var_1708_pad_0, pad_type = var_1708_pad_type_0, strides = var_1708_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1708_cast_fp16")]; - string var_1714_pad_type_0 = const()[name = string("op_1714_pad_type_0"), val = string("valid")]; - tensor var_1714_strides_0 = const()[name = string("op_1714_strides_0"), val = tensor([1, 1])]; - tensor var_1714_pad_0 = const()[name = string("op_1714_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1714_dilations_0 = const()[name = string("op_1714_dilations_0"), val = tensor([1, 1])]; - int32 var_1714_groups_0 = const()[name = string("op_1714_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44971200))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44965632))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1714_cast_fp16 = conv(dilations = var_1714_dilations_0, groups = var_1714_groups_0, pad = var_1714_pad_0, pad_type = var_1714_pad_type_0, strides = var_1714_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1714_cast_fp16")]; - tensor value_17_cast_fp16 = add(x = var_1708_cast_fp16, y = var_1714_cast_fp16)[name = string("value_17_cast_fp16")]; - tensor var_1717 = const()[name = string("op_1717"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_17_cast_fp16 = reshape(shape = var_1717, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")]; - fp16 var_1719_to_fp16 = const()[name = string("op_1719_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1720_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1719_to_fp16)[name = string("op_1720_cast_fp16")]; - tensor var_1721 = const()[name = string("op_1721"), val = tensor([1, 12, 64, -1])]; - tensor var_1722_cast_fp16 = reshape(shape = var_1721, x = key_17_cast_fp16)[name = string("op_1722_cast_fp16")]; - bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; - bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; - tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1720_cast_fp16, y = var_1722_cast_fp16)[name = string("mh_w_17_cast_fp16")]; - tensor var_1725_cast_fp16 = softmax(axis = var_1636, x = mh_w_17_cast_fp16)[name = string("op_1725_cast_fp16")]; - tensor var_1726 = const()[name = string("op_1726"), val = tensor([1, 12, 64, -1])]; - tensor var_1727_cast_fp16 = reshape(shape = var_1726, x = value_17_cast_fp16)[name = string("op_1727_cast_fp16")]; - bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)]; - bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)]; - tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1727_cast_fp16, y = var_1725_cast_fp16)[name = string("attn_17_cast_fp16")]; - tensor var_1730 = const()[name = string("op_1730"), val = tensor([1, 768, 1, -1])]; - tensor input_65_cast_fp16 = reshape(shape = var_1730, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")]; - string var_1740_pad_type_0 = const()[name = string("op_1740_pad_type_0"), val = string("valid")]; - tensor var_1740_strides_0 = const()[name = string("op_1740_strides_0"), val = tensor([1, 1])]; - tensor var_1740_pad_0 = const()[name = string("op_1740_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1740_dilations_0 = const()[name = string("op_1740_dilations_0"), val = tensor([1, 1])]; - int32 var_1740_groups_0 = const()[name = string("op_1740_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45044992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45339968))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45340096)))]; - tensor var_1740_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_1740_cast_fp16")]; - string var_1746_pad_type_0 = const()[name = string("op_1746_pad_type_0"), val = string("valid")]; - tensor var_1746_strides_0 = const()[name = string("op_1746_strides_0"), val = tensor([1, 1])]; - tensor var_1746_pad_0 = const()[name = string("op_1746_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1746_dilations_0 = const()[name = string("op_1746_dilations_0"), val = tensor([1, 1])]; - int32 var_1746_groups_0 = const()[name = string("op_1746_groups_0"), val = int32(1)]; - tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45347776))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45341696))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1746_cast_fp16 = conv(dilations = var_1746_dilations_0, groups = var_1746_groups_0, pad = var_1746_pad_0, pad_type = var_1746_pad_type_0, strides = var_1746_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_1746_cast_fp16")]; - tensor obj_35_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1746_cast_fp16)[name = string("obj_35_cast_fp16")]; - tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_35_cast_fp16")]; - tensor out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor([1])]; - fp16 var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1757_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")]; - tensor input_67_gamma_0_to_fp16 = const()[name = string("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45421568)))]; - tensor input_67_beta_0_to_fp16 = const()[name = string("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45423168)))]; - fp16 input_67_epsilon_0_to_fp16 = const()[name = string("input_67_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_67_cast_fp16")]; - string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")]; - tensor var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor([1, 1])]; - tensor var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor([1, 1])]; - int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)]; - tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45424768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46604480))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46604608)))]; - tensor var_1775_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_1775_cast_fp16")]; - string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")]; - tensor var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor([1, 1])]; - tensor var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor([1, 1])]; - int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)]; - tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46641984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46610816))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = string("op_1781_cast_fp16")]; - tensor input_69_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("input_69_cast_fp16")]; - string input_71_mode_0 = const()[name = string("input_71_mode_0"), val = string("EXACT")]; - tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = string("input_71_cast_fp16")]; - string var_1792_pad_type_0 = const()[name = string("op_1792_pad_type_0"), val = string("valid")]; - tensor var_1792_strides_0 = const()[name = string("op_1792_strides_0"), val = tensor([1, 1])]; - tensor var_1792_pad_0 = const()[name = string("op_1792_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1792_dilations_0 = const()[name = string("op_1792_dilations_0"), val = tensor([1, 1])]; - int32 var_1792_groups_0 = const()[name = string("op_1792_groups_0"), val = int32(1)]; - tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46936960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48116672))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48116800)))]; - tensor var_1792_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_1792_cast_fp16")]; - string var_1798_pad_type_0 = const()[name = string("op_1798_pad_type_0"), val = string("valid")]; - tensor var_1798_strides_0 = const()[name = string("op_1798_strides_0"), val = tensor([1, 1])]; - tensor var_1798_pad_0 = const()[name = string("op_1798_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1798_dilations_0 = const()[name = string("op_1798_dilations_0"), val = tensor([1, 1])]; - int32 var_1798_groups_0 = const()[name = string("op_1798_groups_0"), val = int32(1)]; - tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48148992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48118400))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1798_cast_fp16 = conv(dilations = var_1798_dilations_0, groups = var_1798_groups_0, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1798_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_1798_cast_fp16")]; - tensor hidden_states_21_cast_fp16 = add(x = var_1792_cast_fp16, y = var_1798_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; - tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_37_cast_fp16")]; - int32 var_1808 = const()[name = string("op_1808"), val = int32(3)]; - tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; - fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1827_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")]; - tensor obj_37_gamma_0_to_fp16 = const()[name = string("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48443968)))]; - tensor obj_37_beta_0_to_fp16 = const()[name = string("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48445568)))]; - fp16 obj_37_epsilon_0_to_fp16 = const()[name = string("obj_37_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_37_cast_fp16")]; - string var_1849_pad_type_0 = const()[name = string("op_1849_pad_type_0"), val = string("valid")]; - tensor var_1849_strides_0 = const()[name = string("op_1849_strides_0"), val = tensor([1, 1])]; - tensor var_1849_pad_0 = const()[name = string("op_1849_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1849_dilations_0 = const()[name = string("op_1849_dilations_0"), val = tensor([1, 1])]; - int32 var_1849_groups_0 = const()[name = string("op_1849_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48447168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742144))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742272)))]; - tensor var_1849_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1849_dilations_0, groups = var_1849_groups_0, pad = var_1849_pad_0, pad_type = var_1849_pad_type_0, strides = var_1849_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1849_cast_fp16")]; - string var_1855_pad_type_0 = const()[name = string("op_1855_pad_type_0"), val = string("valid")]; - tensor var_1855_strides_0 = const()[name = string("op_1855_strides_0"), val = tensor([1, 1])]; - tensor var_1855_pad_0 = const()[name = string("op_1855_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1855_dilations_0 = const()[name = string("op_1855_dilations_0"), val = tensor([1, 1])]; - int32 var_1855_groups_0 = const()[name = string("op_1855_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48750080))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48743872))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1855_cast_fp16 = conv(dilations = var_1855_dilations_0, groups = var_1855_groups_0, pad = var_1855_pad_0, pad_type = var_1855_pad_type_0, strides = var_1855_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1855_cast_fp16")]; - tensor query_19_cast_fp16 = add(x = var_1849_cast_fp16, y = var_1855_cast_fp16)[name = string("query_19_cast_fp16")]; - string var_1864_pad_type_0 = const()[name = string("op_1864_pad_type_0"), val = string("valid")]; - tensor var_1864_strides_0 = const()[name = string("op_1864_strides_0"), val = tensor([1, 1])]; - tensor var_1864_pad_0 = const()[name = string("op_1864_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1864_dilations_0 = const()[name = string("op_1864_dilations_0"), val = tensor([1, 1])]; - int32 var_1864_groups_0 = const()[name = string("op_1864_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48823872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49118848))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_1864_cast_fp16 = conv(dilations = var_1864_dilations_0, groups = var_1864_groups_0, pad = var_1864_pad_0, pad_type = var_1864_pad_type_0, strides = var_1864_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1864_cast_fp16")]; - string var_1870_pad_type_0 = const()[name = string("op_1870_pad_type_0"), val = string("valid")]; - tensor var_1870_strides_0 = const()[name = string("op_1870_strides_0"), val = tensor([1, 1])]; - tensor var_1870_pad_0 = const()[name = string("op_1870_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1870_dilations_0 = const()[name = string("op_1870_dilations_0"), val = tensor([1, 1])]; - int32 var_1870_groups_0 = const()[name = string("op_1870_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49125760))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49118976))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1870_cast_fp16 = conv(dilations = var_1870_dilations_0, groups = var_1870_groups_0, pad = var_1870_pad_0, pad_type = var_1870_pad_type_0, strides = var_1870_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1870_cast_fp16")]; - tensor key_19_cast_fp16 = add(x = var_1864_cast_fp16, y = var_1870_cast_fp16)[name = string("key_19_cast_fp16")]; - string var_1880_pad_type_0 = const()[name = string("op_1880_pad_type_0"), val = string("valid")]; - tensor var_1880_strides_0 = const()[name = string("op_1880_strides_0"), val = tensor([1, 1])]; - tensor var_1880_pad_0 = const()[name = string("op_1880_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1880_dilations_0 = const()[name = string("op_1880_dilations_0"), val = tensor([1, 1])]; - int32 var_1880_groups_0 = const()[name = string("op_1880_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49199552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49494528))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49494656)))]; - tensor var_1880_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1880_dilations_0, groups = var_1880_groups_0, pad = var_1880_pad_0, pad_type = var_1880_pad_type_0, strides = var_1880_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1880_cast_fp16")]; - string var_1886_pad_type_0 = const()[name = string("op_1886_pad_type_0"), val = string("valid")]; - tensor var_1886_strides_0 = const()[name = string("op_1886_strides_0"), val = tensor([1, 1])]; - tensor var_1886_pad_0 = const()[name = string("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1886_dilations_0 = const()[name = string("op_1886_dilations_0"), val = tensor([1, 1])]; - int32 var_1886_groups_0 = const()[name = string("op_1886_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49501760))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49496256))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1886_cast_fp16 = conv(dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1886_cast_fp16")]; - tensor value_19_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1886_cast_fp16)[name = string("value_19_cast_fp16")]; - tensor var_1889 = const()[name = string("op_1889"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_19_cast_fp16 = reshape(shape = var_1889, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")]; - fp16 var_1891_to_fp16 = const()[name = string("op_1891_to_fp16"), val = fp16(0x1p-3)]; - tensor var_1892_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1891_to_fp16)[name = string("op_1892_cast_fp16")]; - tensor var_1893 = const()[name = string("op_1893"), val = tensor([1, 12, 64, -1])]; - tensor var_1894_cast_fp16 = reshape(shape = var_1893, x = key_19_cast_fp16)[name = string("op_1894_cast_fp16")]; - bool mh_w_19_transpose_x_0 = const()[name = string("mh_w_19_transpose_x_0"), val = bool(true)]; - bool mh_w_19_transpose_y_0 = const()[name = string("mh_w_19_transpose_y_0"), val = bool(false)]; - tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1892_cast_fp16, y = var_1894_cast_fp16)[name = string("mh_w_19_cast_fp16")]; - tensor var_1897_cast_fp16 = softmax(axis = var_1808, x = mh_w_19_cast_fp16)[name = string("op_1897_cast_fp16")]; - tensor var_1898 = const()[name = string("op_1898"), val = tensor([1, 12, 64, -1])]; - tensor var_1899_cast_fp16 = reshape(shape = var_1898, x = value_19_cast_fp16)[name = string("op_1899_cast_fp16")]; - bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)]; - bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)]; - tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1899_cast_fp16, y = var_1897_cast_fp16)[name = string("attn_19_cast_fp16")]; - tensor var_1902 = const()[name = string("op_1902"), val = tensor([1, 768, 1, -1])]; - tensor input_73_cast_fp16 = reshape(shape = var_1902, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")]; - string var_1912_pad_type_0 = const()[name = string("op_1912_pad_type_0"), val = string("valid")]; - tensor var_1912_strides_0 = const()[name = string("op_1912_strides_0"), val = tensor([1, 1])]; - tensor var_1912_pad_0 = const()[name = string("op_1912_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1912_dilations_0 = const()[name = string("op_1912_dilations_0"), val = tensor([1, 1])]; - int32 var_1912_groups_0 = const()[name = string("op_1912_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49575552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49870528))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49870656)))]; - tensor var_1912_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1912_dilations_0, groups = var_1912_groups_0, pad = var_1912_pad_0, pad_type = var_1912_pad_type_0, strides = var_1912_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_1912_cast_fp16")]; - string var_1918_pad_type_0 = const()[name = string("op_1918_pad_type_0"), val = string("valid")]; - tensor var_1918_strides_0 = const()[name = string("op_1918_strides_0"), val = tensor([1, 1])]; - tensor var_1918_pad_0 = const()[name = string("op_1918_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1918_dilations_0 = const()[name = string("op_1918_dilations_0"), val = tensor([1, 1])]; - int32 var_1918_groups_0 = const()[name = string("op_1918_groups_0"), val = int32(1)]; - tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49877760))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49872256))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1918_cast_fp16 = conv(dilations = var_1918_dilations_0, groups = var_1918_groups_0, pad = var_1918_pad_0, pad_type = var_1918_pad_type_0, strides = var_1918_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_1918_cast_fp16")]; - tensor obj_39_cast_fp16 = add(x = var_1912_cast_fp16, y = var_1918_cast_fp16)[name = string("obj_39_cast_fp16")]; - tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_39_cast_fp16")]; - tensor out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor([1])]; - fp16 var_1929_to_fp16 = const()[name = string("op_1929_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1929_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")]; - tensor input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49951552)))]; - tensor input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49953152)))]; - fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("input_75_cast_fp16")]; - string var_1947_pad_type_0 = const()[name = string("op_1947_pad_type_0"), val = string("valid")]; - tensor var_1947_strides_0 = const()[name = string("op_1947_strides_0"), val = tensor([1, 1])]; - tensor var_1947_pad_0 = const()[name = string("op_1947_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1947_dilations_0 = const()[name = string("op_1947_dilations_0"), val = tensor([1, 1])]; - int32 var_1947_groups_0 = const()[name = string("op_1947_groups_0"), val = int32(1)]; - tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49954752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51134464))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51134592)))]; - tensor var_1947_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1947_dilations_0, groups = var_1947_groups_0, pad = var_1947_pad_0, pad_type = var_1947_pad_type_0, strides = var_1947_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_1947_cast_fp16")]; - string var_1953_pad_type_0 = const()[name = string("op_1953_pad_type_0"), val = string("valid")]; - tensor var_1953_strides_0 = const()[name = string("op_1953_strides_0"), val = tensor([1, 1])]; - tensor var_1953_pad_0 = const()[name = string("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1953_dilations_0 = const()[name = string("op_1953_dilations_0"), val = tensor([1, 1])]; - int32 var_1953_groups_0 = const()[name = string("op_1953_groups_0"), val = int32(1)]; - tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51171648))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51140800))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1953_cast_fp16 = conv(dilations = var_1953_dilations_0, groups = var_1953_groups_0, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1953_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_1953_cast_fp16")]; - tensor input_77_cast_fp16 = add(x = var_1947_cast_fp16, y = var_1953_cast_fp16)[name = string("input_77_cast_fp16")]; - string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")]; - tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")]; - string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")]; - tensor var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor([1, 1])]; - tensor var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor([1, 1])]; - int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)]; - tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51466624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52646336))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52646464)))]; - tensor var_1964_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_1964_cast_fp16")]; - string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")]; - tensor var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor([1, 1])]; - tensor var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor([1, 1])]; - int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)]; - tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52680576))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52648064))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_1970_cast_fp16")]; - tensor hidden_states_23_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; - tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_41_cast_fp16")]; - int32 var_1980 = const()[name = string("op_1980"), val = int32(3)]; - tensor out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor([1])]; - fp16 var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1999_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")]; - tensor obj_41_gamma_0_to_fp16 = const()[name = string("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52975552)))]; - tensor obj_41_beta_0_to_fp16 = const()[name = string("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52977152)))]; - fp16 obj_41_epsilon_0_to_fp16 = const()[name = string("obj_41_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("obj_41_cast_fp16")]; - string var_2021_pad_type_0 = const()[name = string("op_2021_pad_type_0"), val = string("valid")]; - tensor var_2021_strides_0 = const()[name = string("op_2021_strides_0"), val = tensor([1, 1])]; - tensor var_2021_pad_0 = const()[name = string("op_2021_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2021_dilations_0 = const()[name = string("op_2021_dilations_0"), val = tensor([1, 1])]; - int32 var_2021_groups_0 = const()[name = string("op_2021_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52978752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53273728))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53273856)))]; - tensor var_2021_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2021_dilations_0, groups = var_2021_groups_0, pad = var_2021_pad_0, pad_type = var_2021_pad_type_0, strides = var_2021_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2021_cast_fp16")]; - string var_2027_pad_type_0 = const()[name = string("op_2027_pad_type_0"), val = string("valid")]; - tensor var_2027_strides_0 = const()[name = string("op_2027_strides_0"), val = tensor([1, 1])]; - tensor var_2027_pad_0 = const()[name = string("op_2027_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2027_dilations_0 = const()[name = string("op_2027_dilations_0"), val = tensor([1, 1])]; - int32 var_2027_groups_0 = const()[name = string("op_2027_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53281536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53275456))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2027_cast_fp16 = conv(dilations = var_2027_dilations_0, groups = var_2027_groups_0, pad = var_2027_pad_0, pad_type = var_2027_pad_type_0, strides = var_2027_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2027_cast_fp16")]; - tensor query_21_cast_fp16 = add(x = var_2021_cast_fp16, y = var_2027_cast_fp16)[name = string("query_21_cast_fp16")]; - string var_2036_pad_type_0 = const()[name = string("op_2036_pad_type_0"), val = string("valid")]; - tensor var_2036_strides_0 = const()[name = string("op_2036_strides_0"), val = tensor([1, 1])]; - tensor var_2036_pad_0 = const()[name = string("op_2036_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2036_dilations_0 = const()[name = string("op_2036_dilations_0"), val = tensor([1, 1])]; - int32 var_2036_groups_0 = const()[name = string("op_2036_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53355328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53650304))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2036_cast_fp16 = conv(dilations = var_2036_dilations_0, groups = var_2036_groups_0, pad = var_2036_pad_0, pad_type = var_2036_pad_type_0, strides = var_2036_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2036_cast_fp16")]; - string var_2042_pad_type_0 = const()[name = string("op_2042_pad_type_0"), val = string("valid")]; - tensor var_2042_strides_0 = const()[name = string("op_2042_strides_0"), val = tensor([1, 1])]; - tensor var_2042_pad_0 = const()[name = string("op_2042_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2042_dilations_0 = const()[name = string("op_2042_dilations_0"), val = tensor([1, 1])]; - int32 var_2042_groups_0 = const()[name = string("op_2042_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53657216))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53650432))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2042_cast_fp16 = conv(dilations = var_2042_dilations_0, groups = var_2042_groups_0, pad = var_2042_pad_0, pad_type = var_2042_pad_type_0, strides = var_2042_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2042_cast_fp16")]; - tensor key_21_cast_fp16 = add(x = var_2036_cast_fp16, y = var_2042_cast_fp16)[name = string("key_21_cast_fp16")]; - string var_2052_pad_type_0 = const()[name = string("op_2052_pad_type_0"), val = string("valid")]; - tensor var_2052_strides_0 = const()[name = string("op_2052_strides_0"), val = tensor([1, 1])]; - tensor var_2052_pad_0 = const()[name = string("op_2052_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2052_dilations_0 = const()[name = string("op_2052_dilations_0"), val = tensor([1, 1])]; - int32 var_2052_groups_0 = const()[name = string("op_2052_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53731008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54025984))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54026112)))]; - tensor var_2052_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2052_cast_fp16")]; - string var_2058_pad_type_0 = const()[name = string("op_2058_pad_type_0"), val = string("valid")]; - tensor var_2058_strides_0 = const()[name = string("op_2058_strides_0"), val = tensor([1, 1])]; - tensor var_2058_pad_0 = const()[name = string("op_2058_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2058_dilations_0 = const()[name = string("op_2058_dilations_0"), val = tensor([1, 1])]; - int32 var_2058_groups_0 = const()[name = string("op_2058_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54033152))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54027712))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2058_cast_fp16 = conv(dilations = var_2058_dilations_0, groups = var_2058_groups_0, pad = var_2058_pad_0, pad_type = var_2058_pad_type_0, strides = var_2058_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2058_cast_fp16")]; - tensor value_21_cast_fp16 = add(x = var_2052_cast_fp16, y = var_2058_cast_fp16)[name = string("value_21_cast_fp16")]; - tensor var_2061 = const()[name = string("op_2061"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_21_cast_fp16 = reshape(shape = var_2061, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")]; - fp16 var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = fp16(0x1p-3)]; - tensor var_2064_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2063_to_fp16)[name = string("op_2064_cast_fp16")]; - tensor var_2065 = const()[name = string("op_2065"), val = tensor([1, 12, 64, -1])]; - tensor var_2066_cast_fp16 = reshape(shape = var_2065, x = key_21_cast_fp16)[name = string("op_2066_cast_fp16")]; - bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; - bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; - tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2064_cast_fp16, y = var_2066_cast_fp16)[name = string("mh_w_21_cast_fp16")]; - tensor var_2069_cast_fp16 = softmax(axis = var_1980, x = mh_w_21_cast_fp16)[name = string("op_2069_cast_fp16")]; - tensor var_2070 = const()[name = string("op_2070"), val = tensor([1, 12, 64, -1])]; - tensor var_2071_cast_fp16 = reshape(shape = var_2070, x = value_21_cast_fp16)[name = string("op_2071_cast_fp16")]; - bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)]; - bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)]; - tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2071_cast_fp16, y = var_2069_cast_fp16)[name = string("attn_21_cast_fp16")]; - tensor var_2074 = const()[name = string("op_2074"), val = tensor([1, 768, 1, -1])]; - tensor input_81_cast_fp16 = reshape(shape = var_2074, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")]; - string var_2084_pad_type_0 = const()[name = string("op_2084_pad_type_0"), val = string("valid")]; - tensor var_2084_strides_0 = const()[name = string("op_2084_strides_0"), val = tensor([1, 1])]; - tensor var_2084_pad_0 = const()[name = string("op_2084_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2084_dilations_0 = const()[name = string("op_2084_dilations_0"), val = tensor([1, 1])]; - int32 var_2084_groups_0 = const()[name = string("op_2084_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54106944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54401920))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54402048)))]; - tensor var_2084_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2084_dilations_0, groups = var_2084_groups_0, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2084_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2084_cast_fp16")]; - string var_2090_pad_type_0 = const()[name = string("op_2090_pad_type_0"), val = string("valid")]; - tensor var_2090_strides_0 = const()[name = string("op_2090_strides_0"), val = tensor([1, 1])]; - tensor var_2090_pad_0 = const()[name = string("op_2090_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2090_dilations_0 = const()[name = string("op_2090_dilations_0"), val = tensor([1, 1])]; - int32 var_2090_groups_0 = const()[name = string("op_2090_groups_0"), val = int32(1)]; - tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54409472))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54403648))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2090_cast_fp16 = conv(dilations = var_2090_dilations_0, groups = var_2090_groups_0, pad = var_2090_pad_0, pad_type = var_2090_pad_type_0, strides = var_2090_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2090_cast_fp16")]; - tensor obj_43_cast_fp16 = add(x = var_2084_cast_fp16, y = var_2090_cast_fp16)[name = string("obj_43_cast_fp16")]; - tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_43_cast_fp16")]; - tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; - fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2101_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")]; - tensor input_83_gamma_0_to_fp16 = const()[name = string("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54483264)))]; - tensor input_83_beta_0_to_fp16 = const()[name = string("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54484864)))]; - fp16 input_83_epsilon_0_to_fp16 = const()[name = string("input_83_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("input_83_cast_fp16")]; - string var_2119_pad_type_0 = const()[name = string("op_2119_pad_type_0"), val = string("valid")]; - tensor var_2119_strides_0 = const()[name = string("op_2119_strides_0"), val = tensor([1, 1])]; - tensor var_2119_pad_0 = const()[name = string("op_2119_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2119_dilations_0 = const()[name = string("op_2119_dilations_0"), val = tensor([1, 1])]; - int32 var_2119_groups_0 = const()[name = string("op_2119_groups_0"), val = int32(1)]; - tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54486464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55666176))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55666304)))]; - tensor var_2119_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2119_dilations_0, groups = var_2119_groups_0, pad = var_2119_pad_0, pad_type = var_2119_pad_type_0, strides = var_2119_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2119_cast_fp16")]; - string var_2125_pad_type_0 = const()[name = string("op_2125_pad_type_0"), val = string("valid")]; - tensor var_2125_strides_0 = const()[name = string("op_2125_strides_0"), val = tensor([1, 1])]; - tensor var_2125_pad_0 = const()[name = string("op_2125_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2125_dilations_0 = const()[name = string("op_2125_dilations_0"), val = tensor([1, 1])]; - int32 var_2125_groups_0 = const()[name = string("op_2125_groups_0"), val = int32(1)]; - tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55704320))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55672512))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2125_cast_fp16 = conv(dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2125_cast_fp16")]; - tensor input_85_cast_fp16 = add(x = var_2119_cast_fp16, y = var_2125_cast_fp16)[name = string("input_85_cast_fp16")]; - string input_87_mode_0 = const()[name = string("input_87_mode_0"), val = string("EXACT")]; - tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = string("input_87_cast_fp16")]; - string var_2136_pad_type_0 = const()[name = string("op_2136_pad_type_0"), val = string("valid")]; - tensor var_2136_strides_0 = const()[name = string("op_2136_strides_0"), val = tensor([1, 1])]; - tensor var_2136_pad_0 = const()[name = string("op_2136_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2136_dilations_0 = const()[name = string("op_2136_dilations_0"), val = tensor([1, 1])]; - int32 var_2136_groups_0 = const()[name = string("op_2136_groups_0"), val = int32(1)]; - tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55999296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57179008))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57179136)))]; - tensor var_2136_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2136_dilations_0, groups = var_2136_groups_0, pad = var_2136_pad_0, pad_type = var_2136_pad_type_0, strides = var_2136_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("op_2136_cast_fp16")]; - string var_2142_pad_type_0 = const()[name = string("op_2142_pad_type_0"), val = string("valid")]; - tensor var_2142_strides_0 = const()[name = string("op_2142_strides_0"), val = tensor([1, 1])]; - tensor var_2142_pad_0 = const()[name = string("op_2142_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2142_dilations_0 = const()[name = string("op_2142_dilations_0"), val = tensor([1, 1])]; - int32 var_2142_groups_0 = const()[name = string("op_2142_groups_0"), val = int32(1)]; - tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57215232))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57180736))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2142_cast_fp16 = conv(dilations = var_2142_dilations_0, groups = var_2142_groups_0, pad = var_2142_pad_0, pad_type = var_2142_pad_type_0, strides = var_2142_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = string("op_2142_cast_fp16")]; - tensor hidden_states_25_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2142_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; - tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_45_cast_fp16")]; - int32 var_2152 = const()[name = string("op_2152"), val = int32(3)]; - tensor out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor([1])]; - fp16 var_2171_to_fp16 = const()[name = string("op_2171_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2171_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")]; - tensor obj_45_gamma_0_to_fp16 = const()[name = string("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57510208)))]; - tensor obj_45_beta_0_to_fp16 = const()[name = string("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57511808)))]; - fp16 obj_45_epsilon_0_to_fp16 = const()[name = string("obj_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_45_cast_fp16")]; - string var_2193_pad_type_0 = const()[name = string("op_2193_pad_type_0"), val = string("valid")]; - tensor var_2193_strides_0 = const()[name = string("op_2193_strides_0"), val = tensor([1, 1])]; - tensor var_2193_pad_0 = const()[name = string("op_2193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2193_dilations_0 = const()[name = string("op_2193_dilations_0"), val = tensor([1, 1])]; - int32 var_2193_groups_0 = const()[name = string("op_2193_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57513408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808384))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808512)))]; - tensor var_2193_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2193_dilations_0, groups = var_2193_groups_0, pad = var_2193_pad_0, pad_type = var_2193_pad_type_0, strides = var_2193_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2193_cast_fp16")]; - string var_2199_pad_type_0 = const()[name = string("op_2199_pad_type_0"), val = string("valid")]; - tensor var_2199_strides_0 = const()[name = string("op_2199_strides_0"), val = tensor([1, 1])]; - tensor var_2199_pad_0 = const()[name = string("op_2199_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2199_dilations_0 = const()[name = string("op_2199_dilations_0"), val = tensor([1, 1])]; - int32 var_2199_groups_0 = const()[name = string("op_2199_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57816768))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57810112))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2199_cast_fp16 = conv(dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2199_cast_fp16")]; - tensor query_cast_fp16 = add(x = var_2193_cast_fp16, y = var_2199_cast_fp16)[name = string("query_cast_fp16")]; - string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")]; - tensor var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor([1, 1])]; - tensor var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor([1, 1])]; - int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57890560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58185536))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2208_cast_fp16 = conv(dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2208_cast_fp16")]; - string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")]; - tensor var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor([1, 1])]; - tensor var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor([1, 1])]; - int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58192896))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58185664))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2214_cast_fp16")]; - tensor key_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("key_cast_fp16")]; - string var_2224_pad_type_0 = const()[name = string("op_2224_pad_type_0"), val = string("valid")]; - tensor var_2224_strides_0 = const()[name = string("op_2224_strides_0"), val = tensor([1, 1])]; - tensor var_2224_pad_0 = const()[name = string("op_2224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2224_dilations_0 = const()[name = string("op_2224_dilations_0"), val = tensor([1, 1])]; - int32 var_2224_groups_0 = const()[name = string("op_2224_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58266688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58561664))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58561792)))]; - tensor var_2224_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2224_dilations_0, groups = var_2224_groups_0, pad = var_2224_pad_0, pad_type = var_2224_pad_type_0, strides = var_2224_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2224_cast_fp16")]; - string var_2230_pad_type_0 = const()[name = string("op_2230_pad_type_0"), val = string("valid")]; - tensor var_2230_strides_0 = const()[name = string("op_2230_strides_0"), val = tensor([1, 1])]; - tensor var_2230_pad_0 = const()[name = string("op_2230_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2230_dilations_0 = const()[name = string("op_2230_dilations_0"), val = tensor([1, 1])]; - int32 var_2230_groups_0 = const()[name = string("op_2230_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58569728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58563392))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2230_cast_fp16 = conv(dilations = var_2230_dilations_0, groups = var_2230_groups_0, pad = var_2230_pad_0, pad_type = var_2230_pad_type_0, strides = var_2230_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2230_cast_fp16")]; - tensor value_cast_fp16 = add(x = var_2224_cast_fp16, y = var_2230_cast_fp16)[name = string("value_cast_fp16")]; - tensor var_2233 = const()[name = string("op_2233"), val = tensor([1, 12, 64, -1])]; - tensor mh_q_cast_fp16 = reshape(shape = var_2233, x = query_cast_fp16)[name = string("mh_q_cast_fp16")]; - fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1p-3)]; - tensor var_2236_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2235_to_fp16)[name = string("op_2236_cast_fp16")]; - tensor var_2237 = const()[name = string("op_2237"), val = tensor([1, 12, 64, -1])]; - tensor var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = string("op_2238_cast_fp16")]; - bool mh_w_transpose_x_0 = const()[name = string("mh_w_transpose_x_0"), val = bool(true)]; - bool mh_w_transpose_y_0 = const()[name = string("mh_w_transpose_y_0"), val = bool(false)]; - tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2236_cast_fp16, y = var_2238_cast_fp16)[name = string("mh_w_cast_fp16")]; - tensor var_2241_cast_fp16 = softmax(axis = var_2152, x = mh_w_cast_fp16)[name = string("op_2241_cast_fp16")]; - tensor var_2242 = const()[name = string("op_2242"), val = tensor([1, 12, 64, -1])]; - tensor var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = string("op_2243_cast_fp16")]; - bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; - bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; - tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = string("attn_cast_fp16")]; - tensor var_2246 = const()[name = string("op_2246"), val = tensor([1, 768, 1, -1])]; - tensor input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = string("input_89_cast_fp16")]; - string var_2256_pad_type_0 = const()[name = string("op_2256_pad_type_0"), val = string("valid")]; - tensor var_2256_strides_0 = const()[name = string("op_2256_strides_0"), val = tensor([1, 1])]; - tensor var_2256_pad_0 = const()[name = string("op_2256_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2256_dilations_0 = const()[name = string("op_2256_dilations_0"), val = tensor([1, 1])]; - int32 var_2256_groups_0 = const()[name = string("op_2256_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58643520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58938496))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; - tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58938624)))]; - tensor var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2256_cast_fp16")]; - string var_2262_pad_type_0 = const()[name = string("op_2262_pad_type_0"), val = string("valid")]; - tensor var_2262_strides_0 = const()[name = string("op_2262_strides_0"), val = tensor([1, 1])]; - tensor var_2262_pad_0 = const()[name = string("op_2262_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2262_dilations_0 = const()[name = string("op_2262_dilations_0"), val = tensor([1, 1])]; - int32 var_2262_groups_0 = const()[name = string("op_2262_groups_0"), val = int32(1)]; - tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58947520))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58940224))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2262_cast_fp16")]; - tensor obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = string("obj_cast_fp16")]; - tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = string("inputs_47_cast_fp16")]; - tensor out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor([1])]; - fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")]; - tensor input_91_gamma_0_to_fp16 = const()[name = string("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59021312)))]; - tensor input_91_beta_0_to_fp16 = const()[name = string("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59022912)))]; - fp16 input_91_epsilon_0_to_fp16 = const()[name = string("input_91_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_91_cast_fp16")]; - string var_2291_pad_type_0 = const()[name = string("op_2291_pad_type_0"), val = string("valid")]; - tensor var_2291_strides_0 = const()[name = string("op_2291_strides_0"), val = tensor([1, 1])]; - tensor var_2291_pad_0 = const()[name = string("op_2291_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2291_dilations_0 = const()[name = string("op_2291_dilations_0"), val = tensor([1, 1])]; - int32 var_2291_groups_0 = const()[name = string("op_2291_groups_0"), val = int32(1)]; - tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59024512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60204224))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")]; - tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60204352)))]; - tensor var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2291_cast_fp16")]; - string var_2297_pad_type_0 = const()[name = string("op_2297_pad_type_0"), val = string("valid")]; - tensor var_2297_strides_0 = const()[name = string("op_2297_strides_0"), val = tensor([1, 1])]; - tensor var_2297_pad_0 = const()[name = string("op_2297_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2297_dilations_0 = const()[name = string("op_2297_dilations_0"), val = tensor([1, 1])]; - int32 var_2297_groups_0 = const()[name = string("op_2297_groups_0"), val = int32(1)]; - tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60241728))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60210560))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2297_cast_fp16")]; - tensor input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = string("input_93_cast_fp16")]; - string input_95_mode_0 = const()[name = string("input_95_mode_0"), val = string("EXACT")]; - tensor input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")]; - string var_2308_pad_type_0 = const()[name = string("op_2308_pad_type_0"), val = string("valid")]; - tensor var_2308_strides_0 = const()[name = string("op_2308_strides_0"), val = tensor([1, 1])]; - tensor var_2308_pad_0 = const()[name = string("op_2308_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2308_dilations_0 = const()[name = string("op_2308_dilations_0"), val = tensor([1, 1])]; - int32 var_2308_groups_0 = const()[name = string("op_2308_groups_0"), val = int32(1)]; - tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60536704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61716416))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")]; - tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61716544)))]; - tensor var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2308_cast_fp16")]; - string var_2314_pad_type_0 = const()[name = string("op_2314_pad_type_0"), val = string("valid")]; - tensor var_2314_strides_0 = const()[name = string("op_2314_strides_0"), val = tensor([1, 1])]; - tensor var_2314_pad_0 = const()[name = string("op_2314_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2314_dilations_0 = const()[name = string("op_2314_dilations_0"), val = tensor([1, 1])]; - int32 var_2314_groups_0 = const()[name = string("op_2314_groups_0"), val = int32(1)]; - tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61753984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61718144))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2314_cast_fp16")]; - tensor hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = string("hidden_states_cast_fp16")]; - tensor inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = string("inputs_cast_fp16")]; - tensor out_axes_0 = const()[name = string("out_axes_0"), val = tensor([1])]; - fp16 var_2329_to_fp16 = const()[name = string("op_2329_to_fp16"), val = fp16(0x1.5p-17)]; - tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")]; - tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62048960)))]; - tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62050560)))]; - fp16 encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; - tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = string("encoder_output_embeds_type_fp32_cast_fp16")]; - string var_2355_pad_type_0 = const()[name = string("op_2355_pad_type_0"), val = string("valid")]; - tensor var_2355_strides_0 = const()[name = string("op_2355_strides_0"), val = tensor([1, 1])]; - tensor var_2355_pad_0 = const()[name = string("op_2355_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2355_dilations_0 = const()[name = string("op_2355_dilations_0"), val = tensor([1, 1])]; - int32 var_2355_groups_0 = const()[name = string("op_2355_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62052160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347136))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2355_cast_fp16 = conv(dilations = var_2355_dilations_0, groups = var_2355_groups_0, pad = var_2355_pad_0, pad_type = var_2355_pad_type_0, strides = var_2355_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2355_cast_fp16")]; - string var_2361_pad_type_0 = const()[name = string("op_2361_pad_type_0"), val = string("valid")]; - tensor var_2361_strides_0 = const()[name = string("op_2361_strides_0"), val = tensor([1, 1])]; - tensor var_2361_pad_0 = const()[name = string("op_2361_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2361_dilations_0 = const()[name = string("op_2361_dilations_0"), val = tensor([1, 1])]; - int32 var_2361_groups_0 = const()[name = string("op_2361_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62362944))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347264))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2361_cast_fp16 = conv(dilations = var_2361_dilations_0, groups = var_2361_groups_0, pad = var_2361_pad_0, pad_type = var_2361_pad_type_0, strides = var_2361_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2361_cast_fp16")]; - tensor var_2362_cast_fp16 = add(x = var_2355_cast_fp16, y = var_2361_cast_fp16)[name = string("op_2362_cast_fp16")]; - string var_2371_pad_type_0 = const()[name = string("op_2371_pad_type_0"), val = string("valid")]; - tensor var_2371_strides_0 = const()[name = string("op_2371_strides_0"), val = tensor([1, 1])]; - tensor var_2371_pad_0 = const()[name = string("op_2371_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2371_dilations_0 = const()[name = string("op_2371_dilations_0"), val = tensor([1, 1])]; - int32 var_2371_groups_0 = const()[name = string("op_2371_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62436736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62731712))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62731840)))]; - tensor var_2371_cast_fp16 = conv(bias = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2371_dilations_0, groups = var_2371_groups_0, pad = var_2371_pad_0, pad_type = var_2371_pad_type_0, strides = var_2371_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2371_cast_fp16")]; - string var_2377_pad_type_0 = const()[name = string("op_2377_pad_type_0"), val = string("valid")]; - tensor var_2377_strides_0 = const()[name = string("op_2377_strides_0"), val = tensor([1, 1])]; - tensor var_2377_pad_0 = const()[name = string("op_2377_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2377_dilations_0 = const()[name = string("op_2377_dilations_0"), val = tensor([1, 1])]; - int32 var_2377_groups_0 = const()[name = string("op_2377_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62740288))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62733440))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2377_cast_fp16 = conv(dilations = var_2377_dilations_0, groups = var_2377_groups_0, pad = var_2377_pad_0, pad_type = var_2377_pad_type_0, strides = var_2377_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2377_cast_fp16")]; - tensor var_2378_cast_fp16 = add(x = var_2371_cast_fp16, y = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")]; - string var_2398_pad_type_0 = const()[name = string("op_2398_pad_type_0"), val = string("valid")]; - tensor var_2398_strides_0 = const()[name = string("op_2398_strides_0"), val = tensor([1, 1])]; - tensor var_2398_pad_0 = const()[name = string("op_2398_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2398_dilations_0 = const()[name = string("op_2398_dilations_0"), val = tensor([1, 1])]; - int32 var_2398_groups_0 = const()[name = string("op_2398_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62814080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63109056))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2398_cast_fp16 = conv(dilations = var_2398_dilations_0, groups = var_2398_groups_0, pad = var_2398_pad_0, pad_type = var_2398_pad_type_0, strides = var_2398_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2398_cast_fp16")]; - string var_2404_pad_type_0 = const()[name = string("op_2404_pad_type_0"), val = string("valid")]; - tensor var_2404_strides_0 = const()[name = string("op_2404_strides_0"), val = tensor([1, 1])]; - tensor var_2404_pad_0 = const()[name = string("op_2404_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2404_dilations_0 = const()[name = string("op_2404_dilations_0"), val = tensor([1, 1])]; - int32 var_2404_groups_0 = const()[name = string("op_2404_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63118656))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63109184))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2404_cast_fp16 = conv(dilations = var_2404_dilations_0, groups = var_2404_groups_0, pad = var_2404_pad_0, pad_type = var_2404_pad_type_0, strides = var_2404_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2404_cast_fp16")]; - tensor var_2405_cast_fp16 = add(x = var_2398_cast_fp16, y = var_2404_cast_fp16)[name = string("op_2405_cast_fp16")]; - string var_2414_pad_type_0 = const()[name = string("op_2414_pad_type_0"), val = string("valid")]; - tensor var_2414_strides_0 = const()[name = string("op_2414_strides_0"), val = tensor([1, 1])]; - tensor var_2414_pad_0 = const()[name = string("op_2414_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2414_dilations_0 = const()[name = string("op_2414_dilations_0"), val = tensor([1, 1])]; - int32 var_2414_groups_0 = const()[name = string("op_2414_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63192448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63487424))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63487552)))]; - tensor var_2414_cast_fp16 = conv(bias = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2414_dilations_0, groups = var_2414_groups_0, pad = var_2414_pad_0, pad_type = var_2414_pad_type_0, strides = var_2414_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2414_cast_fp16")]; - string var_2420_pad_type_0 = const()[name = string("op_2420_pad_type_0"), val = string("valid")]; - tensor var_2420_strides_0 = const()[name = string("op_2420_strides_0"), val = tensor([1, 1])]; - tensor var_2420_pad_0 = const()[name = string("op_2420_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2420_dilations_0 = const()[name = string("op_2420_dilations_0"), val = tensor([1, 1])]; - int32 var_2420_groups_0 = const()[name = string("op_2420_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63494976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63489152))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2420_cast_fp16 = conv(dilations = var_2420_dilations_0, groups = var_2420_groups_0, pad = var_2420_pad_0, pad_type = var_2420_pad_type_0, strides = var_2420_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2420_cast_fp16")]; - tensor var_2421_cast_fp16 = add(x = var_2414_cast_fp16, y = var_2420_cast_fp16)[name = string("op_2421_cast_fp16")]; - string var_2441_pad_type_0 = const()[name = string("op_2441_pad_type_0"), val = string("valid")]; - tensor var_2441_strides_0 = const()[name = string("op_2441_strides_0"), val = tensor([1, 1])]; - tensor var_2441_pad_0 = const()[name = string("op_2441_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2441_dilations_0 = const()[name = string("op_2441_dilations_0"), val = tensor([1, 1])]; - int32 var_2441_groups_0 = const()[name = string("op_2441_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63568768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63863744))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2441_cast_fp16 = conv(dilations = var_2441_dilations_0, groups = var_2441_groups_0, pad = var_2441_pad_0, pad_type = var_2441_pad_type_0, strides = var_2441_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2441_cast_fp16")]; - string var_2447_pad_type_0 = const()[name = string("op_2447_pad_type_0"), val = string("valid")]; - tensor var_2447_strides_0 = const()[name = string("op_2447_strides_0"), val = tensor([1, 1])]; - tensor var_2447_pad_0 = const()[name = string("op_2447_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2447_dilations_0 = const()[name = string("op_2447_dilations_0"), val = tensor([1, 1])]; - int32 var_2447_groups_0 = const()[name = string("op_2447_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63873088))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63863872))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2447_cast_fp16 = conv(dilations = var_2447_dilations_0, groups = var_2447_groups_0, pad = var_2447_pad_0, pad_type = var_2447_pad_type_0, strides = var_2447_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2447_cast_fp16")]; - tensor var_2448_cast_fp16 = add(x = var_2441_cast_fp16, y = var_2447_cast_fp16)[name = string("op_2448_cast_fp16")]; - string var_2457_pad_type_0 = const()[name = string("op_2457_pad_type_0"), val = string("valid")]; - tensor var_2457_strides_0 = const()[name = string("op_2457_strides_0"), val = tensor([1, 1])]; - tensor var_2457_pad_0 = const()[name = string("op_2457_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2457_dilations_0 = const()[name = string("op_2457_dilations_0"), val = tensor([1, 1])]; - int32 var_2457_groups_0 = const()[name = string("op_2457_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63946880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64241856))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64241984)))]; - tensor var_2457_cast_fp16 = conv(bias = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2457_cast_fp16")]; - string var_2463_pad_type_0 = const()[name = string("op_2463_pad_type_0"), val = string("valid")]; - tensor var_2463_strides_0 = const()[name = string("op_2463_strides_0"), val = tensor([1, 1])]; - tensor var_2463_pad_0 = const()[name = string("op_2463_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2463_dilations_0 = const()[name = string("op_2463_dilations_0"), val = tensor([1, 1])]; - int32 var_2463_groups_0 = const()[name = string("op_2463_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64252480))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64243584))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2463_cast_fp16")]; - tensor var_2464_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = string("op_2464_cast_fp16")]; - string var_2484_pad_type_0 = const()[name = string("op_2484_pad_type_0"), val = string("valid")]; - tensor var_2484_strides_0 = const()[name = string("op_2484_strides_0"), val = tensor([1, 1])]; - tensor var_2484_pad_0 = const()[name = string("op_2484_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2484_dilations_0 = const()[name = string("op_2484_dilations_0"), val = tensor([1, 1])]; - int32 var_2484_groups_0 = const()[name = string("op_2484_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64326272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64621248))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2484_cast_fp16 = conv(dilations = var_2484_dilations_0, groups = var_2484_groups_0, pad = var_2484_pad_0, pad_type = var_2484_pad_type_0, strides = var_2484_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2484_cast_fp16")]; - string var_2490_pad_type_0 = const()[name = string("op_2490_pad_type_0"), val = string("valid")]; - tensor var_2490_strides_0 = const()[name = string("op_2490_strides_0"), val = tensor([1, 1])]; - tensor var_2490_pad_0 = const()[name = string("op_2490_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2490_dilations_0 = const()[name = string("op_2490_dilations_0"), val = tensor([1, 1])]; - int32 var_2490_groups_0 = const()[name = string("op_2490_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64636864))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64621376))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2490_cast_fp16 = conv(dilations = var_2490_dilations_0, groups = var_2490_groups_0, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2490_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2490_cast_fp16")]; - tensor var_2491_cast_fp16 = add(x = var_2484_cast_fp16, y = var_2490_cast_fp16)[name = string("op_2491_cast_fp16")]; - string var_2500_pad_type_0 = const()[name = string("op_2500_pad_type_0"), val = string("valid")]; - tensor var_2500_strides_0 = const()[name = string("op_2500_strides_0"), val = tensor([1, 1])]; - tensor var_2500_pad_0 = const()[name = string("op_2500_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2500_dilations_0 = const()[name = string("op_2500_dilations_0"), val = tensor([1, 1])]; - int32 var_2500_groups_0 = const()[name = string("op_2500_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64710656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65005632))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65005760)))]; - tensor var_2500_cast_fp16 = conv(bias = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2500_cast_fp16")]; - string var_2506_pad_type_0 = const()[name = string("op_2506_pad_type_0"), val = string("valid")]; - tensor var_2506_strides_0 = const()[name = string("op_2506_strides_0"), val = tensor([1, 1])]; - tensor var_2506_pad_0 = const()[name = string("op_2506_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2506_dilations_0 = const()[name = string("op_2506_dilations_0"), val = tensor([1, 1])]; - int32 var_2506_groups_0 = const()[name = string("op_2506_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65020160))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65007360))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2506_cast_fp16 = conv(dilations = var_2506_dilations_0, groups = var_2506_groups_0, pad = var_2506_pad_0, pad_type = var_2506_pad_type_0, strides = var_2506_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2506_cast_fp16")]; - tensor var_2507_cast_fp16 = add(x = var_2500_cast_fp16, y = var_2506_cast_fp16)[name = string("op_2507_cast_fp16")]; - string var_2527_pad_type_0 = const()[name = string("op_2527_pad_type_0"), val = string("valid")]; - tensor var_2527_strides_0 = const()[name = string("op_2527_strides_0"), val = tensor([1, 1])]; - tensor var_2527_pad_0 = const()[name = string("op_2527_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2527_dilations_0 = const()[name = string("op_2527_dilations_0"), val = tensor([1, 1])]; - int32 var_2527_groups_0 = const()[name = string("op_2527_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65093952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65388928))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2527_cast_fp16 = conv(dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2527_cast_fp16")]; - string var_2533_pad_type_0 = const()[name = string("op_2533_pad_type_0"), val = string("valid")]; - tensor var_2533_strides_0 = const()[name = string("op_2533_strides_0"), val = tensor([1, 1])]; - tensor var_2533_pad_0 = const()[name = string("op_2533_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2533_dilations_0 = const()[name = string("op_2533_dilations_0"), val = tensor([1, 1])]; - int32 var_2533_groups_0 = const()[name = string("op_2533_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65403264))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65389056))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2533_cast_fp16 = conv(dilations = var_2533_dilations_0, groups = var_2533_groups_0, pad = var_2533_pad_0, pad_type = var_2533_pad_type_0, strides = var_2533_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2533_cast_fp16")]; - tensor var_2534_cast_fp16 = add(x = var_2527_cast_fp16, y = var_2533_cast_fp16)[name = string("op_2534_cast_fp16")]; - string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")]; - tensor var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor([1, 1])]; - tensor var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor([1, 1])]; - int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65477056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772032))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772160)))]; - tensor var_2543_cast_fp16 = conv(bias = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2543_cast_fp16")]; - string var_2549_pad_type_0 = const()[name = string("op_2549_pad_type_0"), val = string("valid")]; - tensor var_2549_strides_0 = const()[name = string("op_2549_strides_0"), val = tensor([1, 1])]; - tensor var_2549_pad_0 = const()[name = string("op_2549_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2549_dilations_0 = const()[name = string("op_2549_dilations_0"), val = tensor([1, 1])]; - int32 var_2549_groups_0 = const()[name = string("op_2549_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65783744))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65773760))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2549_cast_fp16 = conv(dilations = var_2549_dilations_0, groups = var_2549_groups_0, pad = var_2549_pad_0, pad_type = var_2549_pad_type_0, strides = var_2549_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2549_cast_fp16")]; - tensor var_2550_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2549_cast_fp16)[name = string("op_2550_cast_fp16")]; - string var_2570_pad_type_0 = const()[name = string("op_2570_pad_type_0"), val = string("valid")]; - tensor var_2570_strides_0 = const()[name = string("op_2570_strides_0"), val = tensor([1, 1])]; - tensor var_2570_pad_0 = const()[name = string("op_2570_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2570_dilations_0 = const()[name = string("op_2570_dilations_0"), val = tensor([1, 1])]; - int32 var_2570_groups_0 = const()[name = string("op_2570_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65857536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66152512))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2570_cast_fp16 = conv(dilations = var_2570_dilations_0, groups = var_2570_groups_0, pad = var_2570_pad_0, pad_type = var_2570_pad_type_0, strides = var_2570_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2570_cast_fp16")]; - string var_2576_pad_type_0 = const()[name = string("op_2576_pad_type_0"), val = string("valid")]; - tensor var_2576_strides_0 = const()[name = string("op_2576_strides_0"), val = tensor([1, 1])]; - tensor var_2576_pad_0 = const()[name = string("op_2576_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2576_dilations_0 = const()[name = string("op_2576_dilations_0"), val = tensor([1, 1])]; - int32 var_2576_groups_0 = const()[name = string("op_2576_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66163008))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66152640))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2576_cast_fp16 = conv(dilations = var_2576_dilations_0, groups = var_2576_groups_0, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2576_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2576_cast_fp16")]; - tensor var_2577_cast_fp16 = add(x = var_2570_cast_fp16, y = var_2576_cast_fp16)[name = string("op_2577_cast_fp16")]; - string var_2586_pad_type_0 = const()[name = string("op_2586_pad_type_0"), val = string("valid")]; - tensor var_2586_strides_0 = const()[name = string("op_2586_strides_0"), val = tensor([1, 1])]; - tensor var_2586_pad_0 = const()[name = string("op_2586_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2586_dilations_0 = const()[name = string("op_2586_dilations_0"), val = tensor([1, 1])]; - int32 var_2586_groups_0 = const()[name = string("op_2586_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66236800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66531776))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66531904)))]; - tensor var_2586_cast_fp16 = conv(bias = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2586_dilations_0, groups = var_2586_groups_0, pad = var_2586_pad_0, pad_type = var_2586_pad_type_0, strides = var_2586_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2586_cast_fp16")]; - string var_2592_pad_type_0 = const()[name = string("op_2592_pad_type_0"), val = string("valid")]; - tensor var_2592_strides_0 = const()[name = string("op_2592_strides_0"), val = tensor([1, 1])]; - tensor var_2592_pad_0 = const()[name = string("op_2592_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2592_dilations_0 = const()[name = string("op_2592_dilations_0"), val = tensor([1, 1])]; - int32 var_2592_groups_0 = const()[name = string("op_2592_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66540800))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66533504))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2592_cast_fp16 = conv(dilations = var_2592_dilations_0, groups = var_2592_groups_0, pad = var_2592_pad_0, pad_type = var_2592_pad_type_0, strides = var_2592_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2592_cast_fp16")]; - tensor var_2593_cast_fp16 = add(x = var_2586_cast_fp16, y = var_2592_cast_fp16)[name = string("op_2593_cast_fp16")]; - string var_2613_pad_type_0 = const()[name = string("op_2613_pad_type_0"), val = string("valid")]; - tensor var_2613_strides_0 = const()[name = string("op_2613_strides_0"), val = tensor([1, 1])]; - tensor var_2613_pad_0 = const()[name = string("op_2613_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2613_dilations_0 = const()[name = string("op_2613_dilations_0"), val = tensor([1, 1])]; - int32 var_2613_groups_0 = const()[name = string("op_2613_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66614592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66909568))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2613_cast_fp16 = conv(dilations = var_2613_dilations_0, groups = var_2613_groups_0, pad = var_2613_pad_0, pad_type = var_2613_pad_type_0, strides = var_2613_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2613_cast_fp16")]; - string var_2619_pad_type_0 = const()[name = string("op_2619_pad_type_0"), val = string("valid")]; - tensor var_2619_strides_0 = const()[name = string("op_2619_strides_0"), val = tensor([1, 1])]; - tensor var_2619_pad_0 = const()[name = string("op_2619_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2619_dilations_0 = const()[name = string("op_2619_dilations_0"), val = tensor([1, 1])]; - int32 var_2619_groups_0 = const()[name = string("op_2619_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66917184))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66909696))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2619_cast_fp16 = conv(dilations = var_2619_dilations_0, groups = var_2619_groups_0, pad = var_2619_pad_0, pad_type = var_2619_pad_type_0, strides = var_2619_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2619_cast_fp16")]; - tensor var_2620_cast_fp16 = add(x = var_2613_cast_fp16, y = var_2619_cast_fp16)[name = string("op_2620_cast_fp16")]; - string var_2629_pad_type_0 = const()[name = string("op_2629_pad_type_0"), val = string("valid")]; - tensor var_2629_strides_0 = const()[name = string("op_2629_strides_0"), val = tensor([1, 1])]; - tensor var_2629_pad_0 = const()[name = string("op_2629_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2629_dilations_0 = const()[name = string("op_2629_dilations_0"), val = tensor([1, 1])]; - int32 var_2629_groups_0 = const()[name = string("op_2629_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66990976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67285952))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67286080)))]; - tensor var_2629_cast_fp16 = conv(bias = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2629_dilations_0, groups = var_2629_groups_0, pad = var_2629_pad_0, pad_type = var_2629_pad_type_0, strides = var_2629_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2629_cast_fp16")]; - string var_2635_pad_type_0 = const()[name = string("op_2635_pad_type_0"), val = string("valid")]; - tensor var_2635_strides_0 = const()[name = string("op_2635_strides_0"), val = tensor([1, 1])]; - tensor var_2635_pad_0 = const()[name = string("op_2635_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2635_dilations_0 = const()[name = string("op_2635_dilations_0"), val = tensor([1, 1])]; - int32 var_2635_groups_0 = const()[name = string("op_2635_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67293952))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67287680))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2635_cast_fp16 = conv(dilations = var_2635_dilations_0, groups = var_2635_groups_0, pad = var_2635_pad_0, pad_type = var_2635_pad_type_0, strides = var_2635_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2635_cast_fp16")]; - tensor var_2636_cast_fp16 = add(x = var_2629_cast_fp16, y = var_2635_cast_fp16)[name = string("op_2636_cast_fp16")]; - string var_2656_pad_type_0 = const()[name = string("op_2656_pad_type_0"), val = string("valid")]; - tensor var_2656_strides_0 = const()[name = string("op_2656_strides_0"), val = tensor([1, 1])]; - tensor var_2656_pad_0 = const()[name = string("op_2656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2656_dilations_0 = const()[name = string("op_2656_dilations_0"), val = tensor([1, 1])]; - int32 var_2656_groups_0 = const()[name = string("op_2656_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67367744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67662720))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2656_cast_fp16 = conv(dilations = var_2656_dilations_0, groups = var_2656_groups_0, pad = var_2656_pad_0, pad_type = var_2656_pad_type_0, strides = var_2656_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2656_cast_fp16")]; - string var_2662_pad_type_0 = const()[name = string("op_2662_pad_type_0"), val = string("valid")]; - tensor var_2662_strides_0 = const()[name = string("op_2662_strides_0"), val = tensor([1, 1])]; - tensor var_2662_pad_0 = const()[name = string("op_2662_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2662_dilations_0 = const()[name = string("op_2662_dilations_0"), val = tensor([1, 1])]; - int32 var_2662_groups_0 = const()[name = string("op_2662_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67670976))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67662848))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2662_cast_fp16 = conv(dilations = var_2662_dilations_0, groups = var_2662_groups_0, pad = var_2662_pad_0, pad_type = var_2662_pad_type_0, strides = var_2662_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2662_cast_fp16")]; - tensor var_2663_cast_fp16 = add(x = var_2656_cast_fp16, y = var_2662_cast_fp16)[name = string("op_2663_cast_fp16")]; - string var_2672_pad_type_0 = const()[name = string("op_2672_pad_type_0"), val = string("valid")]; - tensor var_2672_strides_0 = const()[name = string("op_2672_strides_0"), val = tensor([1, 1])]; - tensor var_2672_pad_0 = const()[name = string("op_2672_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2672_dilations_0 = const()[name = string("op_2672_dilations_0"), val = tensor([1, 1])]; - int32 var_2672_groups_0 = const()[name = string("op_2672_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67744768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68039744))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68039872)))]; - tensor var_2672_cast_fp16 = conv(bias = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2672_dilations_0, groups = var_2672_groups_0, pad = var_2672_pad_0, pad_type = var_2672_pad_type_0, strides = var_2672_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2672_cast_fp16")]; - string var_2678_pad_type_0 = const()[name = string("op_2678_pad_type_0"), val = string("valid")]; - tensor var_2678_strides_0 = const()[name = string("op_2678_strides_0"), val = tensor([1, 1])]; - tensor var_2678_pad_0 = const()[name = string("op_2678_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2678_dilations_0 = const()[name = string("op_2678_dilations_0"), val = tensor([1, 1])]; - int32 var_2678_groups_0 = const()[name = string("op_2678_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68047232))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68041472))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2678_cast_fp16 = conv(dilations = var_2678_dilations_0, groups = var_2678_groups_0, pad = var_2678_pad_0, pad_type = var_2678_pad_type_0, strides = var_2678_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2678_cast_fp16")]; - tensor var_2679_cast_fp16 = add(x = var_2672_cast_fp16, y = var_2678_cast_fp16)[name = string("op_2679_cast_fp16")]; - string var_2699_pad_type_0 = const()[name = string("op_2699_pad_type_0"), val = string("valid")]; - tensor var_2699_strides_0 = const()[name = string("op_2699_strides_0"), val = tensor([1, 1])]; - tensor var_2699_pad_0 = const()[name = string("op_2699_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2699_dilations_0 = const()[name = string("op_2699_dilations_0"), val = tensor([1, 1])]; - int32 var_2699_groups_0 = const()[name = string("op_2699_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68121024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68416000))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2699_cast_fp16 = conv(dilations = var_2699_dilations_0, groups = var_2699_groups_0, pad = var_2699_pad_0, pad_type = var_2699_pad_type_0, strides = var_2699_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2699_cast_fp16")]; - string var_2705_pad_type_0 = const()[name = string("op_2705_pad_type_0"), val = string("valid")]; - tensor var_2705_strides_0 = const()[name = string("op_2705_strides_0"), val = tensor([1, 1])]; - tensor var_2705_pad_0 = const()[name = string("op_2705_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2705_dilations_0 = const()[name = string("op_2705_dilations_0"), val = tensor([1, 1])]; - int32 var_2705_groups_0 = const()[name = string("op_2705_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68423936))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68416128))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2705_cast_fp16 = conv(dilations = var_2705_dilations_0, groups = var_2705_groups_0, pad = var_2705_pad_0, pad_type = var_2705_pad_type_0, strides = var_2705_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2705_cast_fp16")]; - tensor var_2706_cast_fp16 = add(x = var_2699_cast_fp16, y = var_2705_cast_fp16)[name = string("op_2706_cast_fp16")]; - string var_2715_pad_type_0 = const()[name = string("op_2715_pad_type_0"), val = string("valid")]; - tensor var_2715_strides_0 = const()[name = string("op_2715_strides_0"), val = tensor([1, 1])]; - tensor var_2715_pad_0 = const()[name = string("op_2715_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2715_dilations_0 = const()[name = string("op_2715_dilations_0"), val = tensor([1, 1])]; - int32 var_2715_groups_0 = const()[name = string("op_2715_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68497728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68792704))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68792832)))]; - tensor var_2715_cast_fp16 = conv(bias = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2715_dilations_0, groups = var_2715_groups_0, pad = var_2715_pad_0, pad_type = var_2715_pad_type_0, strides = var_2715_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2715_cast_fp16")]; - string var_2721_pad_type_0 = const()[name = string("op_2721_pad_type_0"), val = string("valid")]; - tensor var_2721_strides_0 = const()[name = string("op_2721_strides_0"), val = tensor([1, 1])]; - tensor var_2721_pad_0 = const()[name = string("op_2721_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2721_dilations_0 = const()[name = string("op_2721_dilations_0"), val = tensor([1, 1])]; - int32 var_2721_groups_0 = const()[name = string("op_2721_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68800128))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68794432))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2721_cast_fp16 = conv(dilations = var_2721_dilations_0, groups = var_2721_groups_0, pad = var_2721_pad_0, pad_type = var_2721_pad_type_0, strides = var_2721_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2721_cast_fp16")]; - tensor var_2722_cast_fp16 = add(x = var_2715_cast_fp16, y = var_2721_cast_fp16)[name = string("op_2722_cast_fp16")]; - string var_2742_pad_type_0 = const()[name = string("op_2742_pad_type_0"), val = string("valid")]; - tensor var_2742_strides_0 = const()[name = string("op_2742_strides_0"), val = tensor([1, 1])]; - tensor var_2742_pad_0 = const()[name = string("op_2742_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2742_dilations_0 = const()[name = string("op_2742_dilations_0"), val = tensor([1, 1])]; - int32 var_2742_groups_0 = const()[name = string("op_2742_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68873920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69168896))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2742_cast_fp16 = conv(dilations = var_2742_dilations_0, groups = var_2742_groups_0, pad = var_2742_pad_0, pad_type = var_2742_pad_type_0, strides = var_2742_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2742_cast_fp16")]; - string var_2748_pad_type_0 = const()[name = string("op_2748_pad_type_0"), val = string("valid")]; - tensor var_2748_strides_0 = const()[name = string("op_2748_strides_0"), val = tensor([1, 1])]; - tensor var_2748_pad_0 = const()[name = string("op_2748_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2748_dilations_0 = const()[name = string("op_2748_dilations_0"), val = tensor([1, 1])]; - int32 var_2748_groups_0 = const()[name = string("op_2748_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69175488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69169024))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2748_cast_fp16 = conv(dilations = var_2748_dilations_0, groups = var_2748_groups_0, pad = var_2748_pad_0, pad_type = var_2748_pad_type_0, strides = var_2748_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2748_cast_fp16")]; - tensor var_2749_cast_fp16 = add(x = var_2742_cast_fp16, y = var_2748_cast_fp16)[name = string("op_2749_cast_fp16")]; - string var_2758_pad_type_0 = const()[name = string("op_2758_pad_type_0"), val = string("valid")]; - tensor var_2758_strides_0 = const()[name = string("op_2758_strides_0"), val = tensor([1, 1])]; - tensor var_2758_pad_0 = const()[name = string("op_2758_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2758_dilations_0 = const()[name = string("op_2758_dilations_0"), val = tensor([1, 1])]; - int32 var_2758_groups_0 = const()[name = string("op_2758_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69249280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69544256))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69544384)))]; - tensor var_2758_cast_fp16 = conv(bias = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2758_dilations_0, groups = var_2758_groups_0, pad = var_2758_pad_0, pad_type = var_2758_pad_type_0, strides = var_2758_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2758_cast_fp16")]; - string var_2764_pad_type_0 = const()[name = string("op_2764_pad_type_0"), val = string("valid")]; - tensor var_2764_strides_0 = const()[name = string("op_2764_strides_0"), val = tensor([1, 1])]; - tensor var_2764_pad_0 = const()[name = string("op_2764_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2764_dilations_0 = const()[name = string("op_2764_dilations_0"), val = tensor([1, 1])]; - int32 var_2764_groups_0 = const()[name = string("op_2764_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69551232))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69545984))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2764_cast_fp16 = conv(dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2764_cast_fp16")]; - tensor var_2765_cast_fp16 = add(x = var_2758_cast_fp16, y = var_2764_cast_fp16)[name = string("op_2765_cast_fp16")]; - string var_2785_pad_type_0 = const()[name = string("op_2785_pad_type_0"), val = string("valid")]; - tensor var_2785_strides_0 = const()[name = string("op_2785_strides_0"), val = tensor([1, 1])]; - tensor var_2785_pad_0 = const()[name = string("op_2785_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2785_dilations_0 = const()[name = string("op_2785_dilations_0"), val = tensor([1, 1])]; - int32 var_2785_groups_0 = const()[name = string("op_2785_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69625024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69920000))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2785_cast_fp16 = conv(dilations = var_2785_dilations_0, groups = var_2785_groups_0, pad = var_2785_pad_0, pad_type = var_2785_pad_type_0, strides = var_2785_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2785_cast_fp16")]; - string var_2791_pad_type_0 = const()[name = string("op_2791_pad_type_0"), val = string("valid")]; - tensor var_2791_strides_0 = const()[name = string("op_2791_strides_0"), val = tensor([1, 1])]; - tensor var_2791_pad_0 = const()[name = string("op_2791_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2791_dilations_0 = const()[name = string("op_2791_dilations_0"), val = tensor([1, 1])]; - int32 var_2791_groups_0 = const()[name = string("op_2791_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69927488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69920128))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2791_cast_fp16 = conv(dilations = var_2791_dilations_0, groups = var_2791_groups_0, pad = var_2791_pad_0, pad_type = var_2791_pad_type_0, strides = var_2791_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2791_cast_fp16")]; - tensor var_2792_cast_fp16 = add(x = var_2785_cast_fp16, y = var_2791_cast_fp16)[name = string("op_2792_cast_fp16")]; - string var_2801_pad_type_0 = const()[name = string("op_2801_pad_type_0"), val = string("valid")]; - tensor var_2801_strides_0 = const()[name = string("op_2801_strides_0"), val = tensor([1, 1])]; - tensor var_2801_pad_0 = const()[name = string("op_2801_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2801_dilations_0 = const()[name = string("op_2801_dilations_0"), val = tensor([1, 1])]; - int32 var_2801_groups_0 = const()[name = string("op_2801_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70001280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70296256))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70296384)))]; - tensor var_2801_cast_fp16 = conv(bias = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2801_dilations_0, groups = var_2801_groups_0, pad = var_2801_pad_0, pad_type = var_2801_pad_type_0, strides = var_2801_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2801_cast_fp16")]; - string var_2807_pad_type_0 = const()[name = string("op_2807_pad_type_0"), val = string("valid")]; - tensor var_2807_strides_0 = const()[name = string("op_2807_strides_0"), val = tensor([1, 1])]; - tensor var_2807_pad_0 = const()[name = string("op_2807_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2807_dilations_0 = const()[name = string("op_2807_dilations_0"), val = tensor([1, 1])]; - int32 var_2807_groups_0 = const()[name = string("op_2807_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70307328))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70297984))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2807_cast_fp16 = conv(dilations = var_2807_dilations_0, groups = var_2807_groups_0, pad = var_2807_pad_0, pad_type = var_2807_pad_type_0, strides = var_2807_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2807_cast_fp16")]; - tensor var_2808_cast_fp16 = add(x = var_2801_cast_fp16, y = var_2807_cast_fp16)[name = string("op_2808_cast_fp16")]; - string var_2828_pad_type_0 = const()[name = string("op_2828_pad_type_0"), val = string("valid")]; - tensor var_2828_strides_0 = const()[name = string("op_2828_strides_0"), val = tensor([1, 1])]; - tensor var_2828_pad_0 = const()[name = string("op_2828_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2828_dilations_0 = const()[name = string("op_2828_dilations_0"), val = tensor([1, 1])]; - int32 var_2828_groups_0 = const()[name = string("op_2828_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70381120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70676096))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; - tensor var_2828_cast_fp16 = conv(dilations = var_2828_dilations_0, groups = var_2828_groups_0, pad = var_2828_pad_0, pad_type = var_2828_pad_type_0, strides = var_2828_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2828_cast_fp16")]; - string var_2834_pad_type_0 = const()[name = string("op_2834_pad_type_0"), val = string("valid")]; - tensor var_2834_strides_0 = const()[name = string("op_2834_strides_0"), val = tensor([1, 1])]; - tensor var_2834_pad_0 = const()[name = string("op_2834_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2834_dilations_0 = const()[name = string("op_2834_dilations_0"), val = tensor([1, 1])]; - int32 var_2834_groups_0 = const()[name = string("op_2834_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70684480))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70676224))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2834_cast_fp16 = conv(dilations = var_2834_dilations_0, groups = var_2834_groups_0, pad = var_2834_pad_0, pad_type = var_2834_pad_type_0, strides = var_2834_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2834_cast_fp16")]; - tensor k_cast_fp16 = add(x = var_2828_cast_fp16, y = var_2834_cast_fp16)[name = string("k_cast_fp16")]; - string var_2844_pad_type_0 = const()[name = string("op_2844_pad_type_0"), val = string("valid")]; - tensor var_2844_strides_0 = const()[name = string("op_2844_strides_0"), val = tensor([1, 1])]; - tensor var_2844_pad_0 = const()[name = string("op_2844_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2844_dilations_0 = const()[name = string("op_2844_dilations_0"), val = tensor([1, 1])]; - int32 var_2844_groups_0 = const()[name = string("op_2844_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71053248))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; - tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71053376)))]; - tensor var_2844_cast_fp16 = conv(bias = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2844_dilations_0, groups = var_2844_groups_0, pad = var_2844_pad_0, pad_type = var_2844_pad_type_0, strides = var_2844_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2844_cast_fp16")]; - string var_2850_pad_type_0 = const()[name = string("op_2850_pad_type_0"), val = string("valid")]; - tensor var_2850_strides_0 = const()[name = string("op_2850_strides_0"), val = tensor([1, 1])]; - tensor var_2850_pad_0 = const()[name = string("op_2850_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2850_dilations_0 = const()[name = string("op_2850_dilations_0"), val = tensor([1, 1])]; - int32 var_2850_groups_0 = const()[name = string("op_2850_groups_0"), val = int32(1)]; - tensor decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71063808))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71054976))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; - tensor var_2850_cast_fp16 = conv(dilations = var_2850_dilations_0, groups = var_2850_groups_0, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2850_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2850_cast_fp16")]; - tensor v_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2850_cast_fp16)[name = string("v_cast_fp16")]; - int32 var_2856 = const()[name = string("op_2856"), val = int32(0)]; - bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; - tensor input_99_cast_fp16 = concat(axis = var_2856, interleave = input_99_interleave_0, values = (var_2362_cast_fp16, var_2405_cast_fp16, var_2448_cast_fp16, var_2491_cast_fp16, var_2534_cast_fp16, var_2577_cast_fp16, var_2620_cast_fp16, var_2663_cast_fp16, var_2706_cast_fp16, var_2749_cast_fp16, var_2792_cast_fp16, k_cast_fp16))[name = string("input_99_cast_fp16")]; - int32 var_2859 = const()[name = string("op_2859"), val = int32(0)]; - bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; - tensor input_cast_fp16 = concat(axis = var_2859, interleave = input_interleave_0, values = (var_2378_cast_fp16, var_2421_cast_fp16, var_2464_cast_fp16, var_2507_cast_fp16, var_2550_cast_fp16, var_2593_cast_fp16, var_2636_cast_fp16, var_2679_cast_fp16, var_2722_cast_fp16, var_2765_cast_fp16, var_2808_cast_fp16, v_cast_fp16))[name = string("input_cast_fp16")]; - tensor var_2866_pad_0 = const()[name = string("op_2866_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 36])]; - string var_2866_mode_0 = const()[name = string("op_2866_mode_0"), val = string("constant")]; - fp16 const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = fp16(0x0p+0)]; - tensor encoder_attn_key_cache = pad(constant_val = const_13_to_fp16, mode = var_2866_mode_0, pad = var_2866_pad_0, x = input_99_cast_fp16)[name = string("op_2866_cast_fp16")]; - tensor var_2872_pad_0 = const()[name = string("op_2872_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 36])]; - string var_2872_mode_0 = const()[name = string("op_2872_mode_0"), val = string("constant")]; - fp16 const_14_to_fp16 = const()[name = string("const_14_to_fp16"), val = fp16(0x0p+0)]; - tensor encoder_attn_value_cache = pad(constant_val = const_14_to_fp16, mode = var_2872_mode_0, pad = var_2872_pad_0, x = input_cast_fp16)[name = string("op_2872_cast_fp16")]; - } -> (encoder_output_embeds, encoder_attn_key_cache, encoder_attn_value_cache); -} \ No newline at end of file