arda-argmax commited on
Commit
acdfedd
·
verified ·
1 Parent(s): 9b92fca

pyannote-v3-pro W16A16 SpeakerEmbedderPreprocessor

Browse files
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
3
+ size 243
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
3
+ size 330
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 2998, 80]",
13
+ "name" : "preprocessor_output_1",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 7,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios16.cast" : 2,
23
+ "Ios16.mul" : 4,
24
+ "SliceByIndex" : 2,
25
+ "Transpose" : 2,
26
+ "SlidingWindows" : 1,
27
+ "Ios16.sub" : 3,
28
+ "Ios16.log" : 1,
29
+ "Ios16.reduceMean" : 2,
30
+ "Ios16.square" : 2,
31
+ "Squeeze" : 2,
32
+ "Ios16.matmul" : 2,
33
+ "Ios16.add" : 1,
34
+ "Ios16.linear" : 1,
35
+ "ExpandDims" : 4,
36
+ "Ios16.gather" : 2,
37
+ "Ios16.maximum" : 1,
38
+ "Identity" : 1,
39
+ "Pad" : 2
40
+ },
41
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
42
+ "isUpdatable" : "0",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "availability" : {
47
+ "macOS" : "13.0",
48
+ "tvOS" : "16.0",
49
+ "visionOS" : "1.0",
50
+ "watchOS" : "9.0",
51
+ "iOS" : "16.0",
52
+ "macCatalyst" : "16.0"
53
+ },
54
+ "modelType" : {
55
+ "name" : "MLModelType_mlProgram"
56
+ },
57
+ "userDefinedMetadata" : {
58
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
59
+ "com.github.apple.coremltools.source" : "torch==2.6.0",
60
+ "com.github.apple.coremltools.version" : "8.2"
61
+ },
62
+ "inputSchema" : [
63
+ {
64
+ "hasShapeFlexibility" : "0",
65
+ "isOptional" : "0",
66
+ "dataType" : "Float16",
67
+ "formattedType" : "MultiArray (Float16 1 × 480000)",
68
+ "shortDescription" : "",
69
+ "shape" : "[1, 480000]",
70
+ "name" : "waveforms",
71
+ "type" : "MultiArray"
72
+ }
73
+ ],
74
+ "generatedClassName" : "SpeakerEmbeddingPreprocessor",
75
+ "method" : "predict"
76
+ }
77
+ ]
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
3
+ {
4
+ func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
5
+ tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
6
+ tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
7
+ tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
8
+ tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
9
+ tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
10
+ tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
11
+ tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
12
+ tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
13
+ tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
14
+ tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
15
+ tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
16
+ tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
17
+ tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
18
+ tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
19
+ tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
20
+ tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
21
+ tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
22
+ tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
23
+ tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
24
+ tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
25
+ tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
26
+ tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
27
+ tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
28
+ tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
29
+ tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
30
+ tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
31
+ tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
32
+ tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
33
+ tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
34
+ tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
35
+ tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
36
+ tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
37
+ tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
38
+ tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
39
+ tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
40
+ tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
41
+ tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
42
+ tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
43
+ tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
44
+ tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
45
+ tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
46
+ tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
47
+ tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
48
+ tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
49
+ tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
50
+ tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
51
+ tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
52
+ tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
53
+ tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
54
+ tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
55
+ tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
56
+ tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
57
+ tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
58
+ tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
59
+ tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
60
+ tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
61
+ tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
62
+ tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
63
+ tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
64
+ tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
65
+ tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
66
+ tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
67
+ tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
68
+ tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
69
+ tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
70
+ tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
71
+ tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
72
+ tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
73
+ tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
74
+ tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
75
+ tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
76
+ tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
77
+ tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
78
+ tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
79
+ tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
80
+ tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
81
+ tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
82
+ tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
83
+ tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
84
+ tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
85
+ tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
86
+ tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
87
+ tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
88
+ tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
89
+ } -> (preprocessor_output_1);
90
+ }
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
3
+ size 2181696