pyannote-v3-pro W16A16 SpeakerEmbedderPreprocessor

Browse files

Files changed (5) hide show

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin +3 -0
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json +77 -0
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil +90 -0
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin +3 -0

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
+size 243

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
+size 330

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,77 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float32",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
+        "shortDescription" : "",
+        "shape" : "[1, 2998, 80]",
+        "name" : "preprocessor_output_1",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios16.cast" : 2,
+      "Ios16.mul" : 4,
+      "SliceByIndex" : 2,
+      "Transpose" : 2,
+      "SlidingWindows" : 1,
+      "Ios16.sub" : 3,
+      "Ios16.log" : 1,
+      "Ios16.reduceMean" : 2,
+      "Ios16.square" : 2,
+      "Squeeze" : 2,
+      "Ios16.matmul" : 2,
+      "Ios16.add" : 1,
+      "Ios16.linear" : 1,
+      "ExpandDims" : 4,
+      "Ios16.gather" : 2,
+      "Ios16.maximum" : 1,
+      "Identity" : 1,
+      "Pad" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.6.0",
+      "com.github.apple.coremltools.version" : "8.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 480000)",
+        "shortDescription" : "",
+        "shape" : "[1, 480000]",
+        "name" : "waveforms",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "SpeakerEmbeddingPreprocessor",
+    "method" : "predict"
+  }
+]

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,90 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
+            tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
+            tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
+            tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
+            tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
+            tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
+            tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
+            tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
+            tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
+            tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
+            tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
+            tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
+            tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
+            tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
+            tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
+            tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
+            tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
+            tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
+            tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
+            tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
+            tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
+            tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
+            tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
+            tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
+            tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
+            tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
+            tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
+            tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
+            tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
+            tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
+            tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
+            tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
+            tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
+            tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
+            tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
+            tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
+            tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
+            tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
+            tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
+            tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
+            tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
+            tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
+            tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
+            tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
+            tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
+            tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
+            tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
+            tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
+            tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
+            tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
+            tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
+            tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
+            tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
+            tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
+            tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
+            tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
+            tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
+            tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
+            tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
+            tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
+            tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
+            tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
+            tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
+            tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
+        } -> (preprocessor_output_1);
+}

speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
+size 2181696