add download youtube audio base.

#2
Files changed (1) hide show
  1. app.py +563 -545
app.py CHANGED
@@ -1,546 +1,564 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- import gradio as gr
6
-
7
- roformer_models = {
8
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
- }
13
-
14
- mdx23c_models = [
15
- 'MDX23C_D1581.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
- ]
19
-
20
- mdxnet_models = [
21
- 'UVR-MDX-NET-Inst_full_292.onnx',
22
- 'UVR-MDX-NET_Inst_187_beta.onnx',
23
- 'UVR-MDX-NET_Inst_82_beta.onnx',
24
- 'UVR-MDX-NET_Inst_90_beta.onnx',
25
- 'UVR-MDX-NET_Main_340.onnx',
26
- 'UVR-MDX-NET_Main_390.onnx',
27
- 'UVR-MDX-NET_Main_406.onnx',
28
- 'UVR-MDX-NET_Main_427.onnx',
29
- 'UVR-MDX-NET_Main_438.onnx',
30
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
- 'UVR_MDXNET_Main.onnx',
35
- 'UVR-MDX-NET-Inst_Main.onnx',
36
- 'UVR_MDXNET_1_9703.onnx',
37
- 'UVR_MDXNET_2_9682.onnx',
38
- 'UVR_MDXNET_3_9662.onnx',
39
- 'UVR-MDX-NET-Inst_1.onnx',
40
- 'UVR-MDX-NET-Inst_2.onnx',
41
- 'UVR-MDX-NET-Inst_3.onnx',
42
- 'UVR_MDXNET_KARA.onnx',
43
- 'UVR_MDXNET_KARA_2.onnx',
44
- 'UVR_MDXNET_9482.onnx',
45
- 'UVR-MDX-NET-Voc_FT.onnx',
46
- 'Kim_Vocal_1.onnx',
47
- 'Kim_Vocal_2.onnx',
48
- 'Kim_Inst.onnx',
49
- 'Reverb_HQ_By_FoxJoy.onnx',
50
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
- 'kuielab_a_vocals.onnx',
52
- 'kuielab_a_other.onnx',
53
- 'kuielab_a_bass.onnx',
54
- 'kuielab_a_drums.onnx',
55
- 'kuielab_b_vocals.onnx',
56
- 'kuielab_b_other.onnx',
57
- 'kuielab_b_bass.onnx',
58
- 'kuielab_b_drums.onnx',
59
- ]
60
-
61
- vrarch_models = [
62
- '1_HP-UVR.pth',
63
- '2_HP-UVR.pth',
64
- '3_HP-Vocal-UVR.pth',
65
- '4_HP-Vocal-UVR.pth',
66
- '5_HP-Karaoke-UVR.pth',
67
- '6_HP-Karaoke-UVR.pth',
68
- '7_HP2-UVR.pth',
69
- '8_HP2-UVR.pth',
70
- '9_HP2-UVR.pth',
71
- '10_SP-UVR-2B-32000-1.pth',
72
- '11_SP-UVR-2B-32000-2.pth',
73
- '12_SP-UVR-3B-44100.pth',
74
- '13_SP-UVR-4B-44100-1.pth',
75
- '14_SP-UVR-4B-44100-2.pth',
76
- '15_SP-UVR-MID-44100-1.pth',
77
- '16_SP-UVR-MID-44100-2.pth',
78
- '17_HP-Wind_Inst-UVR.pth',
79
- 'UVR-De-Echo-Aggressive.pth',
80
- 'UVR-De-Echo-Normal.pth',
81
- 'UVR-DeEcho-DeReverb.pth',
82
- 'UVR-DeNoise-Lite.pth',
83
- 'UVR-DeNoise.pth',
84
- 'UVR-BVE-4B_SN-44100-1.pth',
85
- 'MGM_HIGHEND_v4.pth',
86
- 'MGM_LOWEND_A_v4.pth',
87
- 'MGM_LOWEND_B_v4.pth',
88
- 'MGM_MAIN_v4.pth',
89
- ]
90
-
91
- demucs_models = [
92
- 'htdemucs_ft.yaml',
93
- 'htdemucs.yaml',
94
- 'hdemucs_mmi.yaml',
95
- ]
96
-
97
- output_format = [
98
- 'wav',
99
- 'flac',
100
- 'mp3',
101
- ]
102
-
103
- mdxnet_overlap_values = [
104
- '0.25',
105
- '0.5',
106
- '0.75',
107
- '0.99',
108
- ]
109
-
110
- vrarch_window_size_values = [
111
- '320',
112
- '512',
113
- '1024',
114
- ]
115
-
116
- demucs_overlap_values = [
117
- '0.25',
118
- '0.50',
119
- '0.75',
120
- '0.99',
121
- ]
122
-
123
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
124
- files_list = []
125
- files_list.clear()
126
- directory = "./outputs"
127
- random_id = str(random.randint(10000, 99999))
128
- pattern = f"{random_id}"
129
- os.makedirs("outputs", exist_ok=True)
130
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
131
- full_roformer_model = roformer_models[roformer_model]
132
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
133
- os.system(prompt)
134
-
135
- for file in os.listdir(directory):
136
- if re.search(pattern, file):
137
- files_list.append(os.path.join(directory, file))
138
-
139
- stem1_file = files_list[0]
140
- stem2_file = files_list[1]
141
-
142
- return stem1_file, stem2_file
143
-
144
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
145
- files_list = []
146
- files_list.clear()
147
- directory = "./outputs"
148
- random_id = str(random.randint(10000, 99999))
149
- pattern = f"{random_id}"
150
- os.makedirs("outputs", exist_ok=True)
151
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
152
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
153
- os.system(prompt)
154
-
155
- for file in os.listdir(directory):
156
- if re.search(pattern, file):
157
- files_list.append(os.path.join(directory, file))
158
-
159
- stem1_file = files_list[0]
160
- stem2_file = files_list[1]
161
-
162
- return stem1_file, stem2_file
163
-
164
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
165
- files_list = []
166
- files_list.clear()
167
- directory = "./outputs"
168
- random_id = str(random.randint(10000, 99999))
169
- pattern = f"{random_id}"
170
- os.makedirs("outputs", exist_ok=True)
171
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
172
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
173
-
174
- if mdxnet_denoise:
175
- prompt += " --mdx_enable_denoise"
176
-
177
- os.system(prompt)
178
-
179
- for file in os.listdir(directory):
180
- if re.search(pattern, file):
181
- files_list.append(os.path.join(directory, file))
182
-
183
- stem1_file = files_list[0]
184
- stem2_file = files_list[1]
185
-
186
- return stem1_file, stem2_file
187
-
188
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
189
- files_list = []
190
- files_list.clear()
191
- directory = "./outputs"
192
- random_id = str(random.randint(10000, 99999))
193
- pattern = f"{random_id}"
194
- os.makedirs("outputs", exist_ok=True)
195
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
196
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
197
-
198
- if vrarch_tta:
199
- prompt += " --vr_enable_tta"
200
- if vrarch_high_end_process:
201
- prompt += " --vr_high_end_process"
202
-
203
- os.system(prompt)
204
-
205
- for file in os.listdir(directory):
206
- if re.search(pattern, file):
207
- files_list.append(os.path.join(directory, file))
208
-
209
- stem1_file = files_list[0]
210
- stem2_file = files_list[1]
211
-
212
- return stem1_file, stem2_file
213
-
214
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
215
- files_list = []
216
- files_list.clear()
217
- directory = "./outputs"
218
- random_id = str(random.randint(10000, 99999))
219
- pattern = f"{random_id}"
220
- os.makedirs("outputs", exist_ok=True)
221
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
222
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
223
-
224
- os.system(prompt)
225
-
226
- for file in os.listdir(directory):
227
- if re.search(pattern, file):
228
- files_list.append(os.path.join(directory, file))
229
-
230
- stem1_file = files_list[0]
231
- stem2_file = files_list[1]
232
- stem3_file = files_list[2]
233
- stem4_file = files_list[3]
234
-
235
- return stem1_file, stem2_file, stem3_file, stem4_file
236
-
237
- with gr.Blocks(theme="NoCrypt/[email protected]", title="🎵 UVR5 UI 🎵") as app:
238
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
239
- gr.Markdown("If you liked this HF Space you can give me a ❤️")
240
- with gr.Tabs():
241
- with gr.TabItem("BS/Mel Roformer"):
242
- with gr.Row():
243
- roformer_model = gr.Dropdown(
244
- label = "Select the Model",
245
- choices=list(roformer_models.keys()),
246
- interactive = True
247
- )
248
- roformer_output_format = gr.Dropdown(
249
- label = "Select the Output Format",
250
- choices = output_format,
251
- interactive = True
252
- )
253
- with gr.Row():
254
- roformer_overlap = gr.Slider(
255
- minimum = 2,
256
- maximum = 4,
257
- step = 1,
258
- label = "Overlap",
259
- info = "Amount of overlap between prediction windows.",
260
- value = 4,
261
- interactive = True
262
- )
263
- with gr.Row():
264
- roformer_audio = gr.Audio(
265
- label = "Input Audio",
266
- type = "numpy",
267
- interactive = True
268
- )
269
- with gr.Row():
270
- roformer_button = gr.Button("Separate!", variant = "primary")
271
- with gr.Row():
272
- roformer_stem1 = gr.Audio(
273
- show_download_button = True,
274
- interactive = False,
275
- label = "Stem 1",
276
- type = "filepath"
277
- )
278
- roformer_stem2 = gr.Audio(
279
- show_download_button = True,
280
- interactive = False,
281
- label = "Stem 2",
282
- type = "filepath"
283
- )
284
-
285
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
286
-
287
- with gr.TabItem("MDX23C"):
288
- with gr.Row():
289
- mdx23c_model = gr.Dropdown(
290
- label = "Select the Model",
291
- choices = mdx23c_models,
292
- interactive = True
293
- )
294
- mdx23c_output_format = gr.Dropdown(
295
- label = "Select the Output Format",
296
- choices = output_format,
297
- interactive = True
298
- )
299
- with gr.Row():
300
- mdx23c_segment_size = gr.Slider(
301
- minimum = 32,
302
- maximum = 4000,
303
- step = 32,
304
- label = "Segment Size",
305
- info = "Larger consumes more resources, but may give better results.",
306
- value = 256,
307
- interactive = True
308
- )
309
- mdx23c_overlap = gr.Slider(
310
- minimum = 2,
311
- maximum = 50,
312
- step = 1,
313
- label = "Overlap",
314
- info = "Amount of overlap between prediction windows.",
315
- value = 8,
316
- interactive = True
317
- )
318
- with gr.Row():
319
- mdx23c_audio = gr.Audio(
320
- label = "Input Audio",
321
- type = "numpy",
322
- interactive = True
323
- )
324
- with gr.Row():
325
- mdx23c_button = gr.Button("Separate!", variant = "primary")
326
- with gr.Row():
327
- mdx23c_stem1 = gr.Audio(
328
- show_download_button = True,
329
- interactive = False,
330
- label = "Stem 1",
331
- type = "filepath"
332
- )
333
- mdx23c_stem2 = gr.Audio(
334
- show_download_button = True,
335
- interactive = False,
336
- label = "Stem 2",
337
- type = "filepath"
338
- )
339
-
340
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
341
-
342
- with gr.TabItem("MDX-NET"):
343
- with gr.Row():
344
- mdxnet_model = gr.Dropdown(
345
- label = "Select the Model",
346
- choices = mdxnet_models,
347
- interactive = True
348
- )
349
- mdxnet_output_format = gr.Dropdown(
350
- label = "Select the Output Format",
351
- choices = output_format,
352
- interactive = True
353
- )
354
- with gr.Row():
355
- mdxnet_segment_size = gr.Slider(
356
- minimum = 32,
357
- maximum = 4000,
358
- step = 32,
359
- label = "Segment Size",
360
- info = "Larger consumes more resources, but may give better results.",
361
- value = 256,
362
- interactive = True
363
- )
364
- mdxnet_overlap = gr.Dropdown(
365
- label = "Overlap",
366
- choices = mdxnet_overlap_values,
367
- value = mdxnet_overlap_values[0],
368
- interactive = True
369
- )
370
- mdxnet_denoise = gr.Checkbox(
371
- label = "Denoise",
372
- info = "Enable denoising during separation.",
373
- value = True,
374
- interactive = True
375
- )
376
- with gr.Row():
377
- mdxnet_audio = gr.Audio(
378
- label = "Input Audio",
379
- type = "numpy",
380
- interactive = True
381
- )
382
- with gr.Row():
383
- mdxnet_button = gr.Button("Separate!", variant = "primary")
384
- with gr.Row():
385
- mdxnet_stem1 = gr.Audio(
386
- show_download_button = True,
387
- interactive = False,
388
- label = "Stem 1",
389
- type = "filepath"
390
- )
391
- mdxnet_stem2 = gr.Audio(
392
- show_download_button = True,
393
- interactive = False,
394
- label = "Stem 2",
395
- type = "filepath"
396
- )
397
-
398
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
399
-
400
- with gr.TabItem("VR ARCH"):
401
- with gr.Row():
402
- vrarch_model = gr.Dropdown(
403
- label = "Select the Model",
404
- choices = vrarch_models,
405
- interactive = True
406
- )
407
- vrarch_output_format = gr.Dropdown(
408
- label = "Select the Output Format",
409
- choices = output_format,
410
- interactive = True
411
- )
412
- with gr.Row():
413
- vrarch_window_size = gr.Dropdown(
414
- label = "Window Size",
415
- choices = vrarch_window_size_values,
416
- value = vrarch_window_size_values[0],
417
- interactive = True
418
- )
419
- vrarch_agression = gr.Slider(
420
- minimum = 1,
421
- maximum = 50,
422
- step = 1,
423
- label = "Agression",
424
- info = "Intensity of primary stem extraction.",
425
- value = 5,
426
- interactive = True
427
- )
428
- vrarch_tta = gr.Checkbox(
429
- label = "TTA",
430
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
431
- value = True,
432
- visible = True,
433
- interactive = True,
434
- )
435
- vrarch_high_end_process = gr.Checkbox(
436
- label = "High End Process",
437
- info = "Mirror the missing frequency range of the output.",
438
- value = False,
439
- visible = True,
440
- interactive = True,
441
- )
442
- with gr.Row():
443
- vrarch_audio = gr.Audio(
444
- label = "Input Audio",
445
- type = "numpy",
446
- interactive = True
447
- )
448
- with gr.Row():
449
- vrarch_button = gr.Button("Separate!", variant = "primary")
450
- with gr.Row():
451
- vrarch_stem1 = gr.Audio(
452
- show_download_button = True,
453
- interactive = False,
454
- type = "filepath",
455
- label = "Stem 1"
456
- )
457
- vrarch_stem2 = gr.Audio(
458
- show_download_button = True,
459
- interactive = False,
460
- type = "filepath",
461
- label = "Stem 2"
462
- )
463
-
464
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
465
-
466
- with gr.TabItem("Demucs"):
467
- with gr.Row():
468
- demucs_model = gr.Dropdown(
469
- label = "Select the Model",
470
- choices = demucs_models,
471
- interactive = True
472
- )
473
- demucs_output_format = gr.Dropdown(
474
- label = "Select the Output Format",
475
- choices = output_format,
476
- interactive = True
477
- )
478
- with gr.Row():
479
- demucs_shifts = gr.Slider(
480
- minimum = 1,
481
- maximum = 20,
482
- step = 1,
483
- label = "Shifts",
484
- info = "Number of predictions with random shifts, higher = slower but better quality.",
485
- value = 2,
486
- interactive = True
487
- )
488
- demucs_overlap = gr.Dropdown(
489
- label = "Overlap",
490
- choices = demucs_overlap_values,
491
- value = demucs_overlap_values[0],
492
- interactive = True
493
- )
494
- with gr.Row():
495
- demucs_audio = gr.Audio(
496
- label = "Input Audio",
497
- type = "numpy",
498
- interactive = True
499
- )
500
- with gr.Row():
501
- demucs_button = gr.Button("Separate!", variant = "primary")
502
- with gr.Row():
503
- demucs_stem1 = gr.Audio(
504
- show_download_button = True,
505
- interactive = False,
506
- type = "filepath",
507
- label = "Stem 1"
508
- )
509
- demucs_stem2 = gr.Audio(
510
- show_download_button = True,
511
- interactive = False,
512
- type = "filepath",
513
- label = "Stem 2"
514
- )
515
- with gr.Row():
516
- demucs_stem3 = gr.Audio(
517
- show_download_button = True,
518
- interactive = False,
519
- type = "filepath",
520
- label = "Stem 3"
521
- )
522
- demucs_stem4 = gr.Audio(
523
- show_download_button = True,
524
- interactive = False,
525
- type = "filepath",
526
- label = "Stem 4"
527
- )
528
-
529
- demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
530
-
531
- with gr.TabItem("Credits"):
532
- gr.Markdown(
533
- """
534
- UVR5 UI created by **[Not Eddy (Spanish Mod)](http://discord.com/users/274566299349155851)** in **[AI HUB](https://discord.gg/aihub)** community.
535
-
536
- * python-audio-separator by [beveradb](https://github.com/beveradb).
537
- * Thanks to [Ilaria](https://github.com/TheStingerX) and [Mikus](https://github.com/cappuch) for the help with the code.
538
- * Improvements by [Blane187](https://github.com/Blane187).
539
-
540
- You can donate to the original UVR5 project here:
541
- [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
542
- """
543
- )
544
-
545
- app.queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  app.launch()
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ import gradio as gr
6
+
7
+ roformer_models = {
8
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
+ }
13
+
14
+ mdx23c_models = [
15
+ 'MDX23C_D1581.ckpt',
16
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
+ ]
19
+
20
+ mdxnet_models = [
21
+ 'UVR-MDX-NET-Inst_full_292.onnx',
22
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
23
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
24
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
25
+ 'UVR-MDX-NET_Main_340.onnx',
26
+ 'UVR-MDX-NET_Main_390.onnx',
27
+ 'UVR-MDX-NET_Main_406.onnx',
28
+ 'UVR-MDX-NET_Main_427.onnx',
29
+ 'UVR-MDX-NET_Main_438.onnx',
30
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
+ 'UVR_MDXNET_Main.onnx',
35
+ 'UVR-MDX-NET-Inst_Main.onnx',
36
+ 'UVR_MDXNET_1_9703.onnx',
37
+ 'UVR_MDXNET_2_9682.onnx',
38
+ 'UVR_MDXNET_3_9662.onnx',
39
+ 'UVR-MDX-NET-Inst_1.onnx',
40
+ 'UVR-MDX-NET-Inst_2.onnx',
41
+ 'UVR-MDX-NET-Inst_3.onnx',
42
+ 'UVR_MDXNET_KARA.onnx',
43
+ 'UVR_MDXNET_KARA_2.onnx',
44
+ 'UVR_MDXNET_9482.onnx',
45
+ 'UVR-MDX-NET-Voc_FT.onnx',
46
+ 'Kim_Vocal_1.onnx',
47
+ 'Kim_Vocal_2.onnx',
48
+ 'Kim_Inst.onnx',
49
+ 'Reverb_HQ_By_FoxJoy.onnx',
50
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
+ 'kuielab_a_vocals.onnx',
52
+ 'kuielab_a_other.onnx',
53
+ 'kuielab_a_bass.onnx',
54
+ 'kuielab_a_drums.onnx',
55
+ 'kuielab_b_vocals.onnx',
56
+ 'kuielab_b_other.onnx',
57
+ 'kuielab_b_bass.onnx',
58
+ 'kuielab_b_drums.onnx',
59
+ ]
60
+
61
+ vrarch_models = [
62
+ '1_HP-UVR.pth',
63
+ '2_HP-UVR.pth',
64
+ '3_HP-Vocal-UVR.pth',
65
+ '4_HP-Vocal-UVR.pth',
66
+ '5_HP-Karaoke-UVR.pth',
67
+ '6_HP-Karaoke-UVR.pth',
68
+ '7_HP2-UVR.pth',
69
+ '8_HP2-UVR.pth',
70
+ '9_HP2-UVR.pth',
71
+ '10_SP-UVR-2B-32000-1.pth',
72
+ '11_SP-UVR-2B-32000-2.pth',
73
+ '12_SP-UVR-3B-44100.pth',
74
+ '13_SP-UVR-4B-44100-1.pth',
75
+ '14_SP-UVR-4B-44100-2.pth',
76
+ '15_SP-UVR-MID-44100-1.pth',
77
+ '16_SP-UVR-MID-44100-2.pth',
78
+ '17_HP-Wind_Inst-UVR.pth',
79
+ 'UVR-De-Echo-Aggressive.pth',
80
+ 'UVR-De-Echo-Normal.pth',
81
+ 'UVR-DeEcho-DeReverb.pth',
82
+ 'UVR-DeNoise-Lite.pth',
83
+ 'UVR-DeNoise.pth',
84
+ 'UVR-BVE-4B_SN-44100-1.pth',
85
+ 'MGM_HIGHEND_v4.pth',
86
+ 'MGM_LOWEND_A_v4.pth',
87
+ 'MGM_LOWEND_B_v4.pth',
88
+ 'MGM_MAIN_v4.pth',
89
+ ]
90
+
91
+ demucs_models = [
92
+ 'htdemucs_ft.yaml',
93
+ 'htdemucs.yaml',
94
+ 'hdemucs_mmi.yaml',
95
+ ]
96
+
97
+ output_format = [
98
+ 'wav',
99
+ 'flac',
100
+ 'mp3',
101
+ ]
102
+
103
+ mdxnet_overlap_values = [
104
+ '0.25',
105
+ '0.5',
106
+ '0.75',
107
+ '0.99',
108
+ ]
109
+
110
+ vrarch_window_size_values = [
111
+ '320',
112
+ '512',
113
+ '1024',
114
+ ]
115
+
116
+ demucs_overlap_values = [
117
+ '0.25',
118
+ '0.50',
119
+ '0.75',
120
+ '0.99',
121
+ ]
122
+
123
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
124
+ files_list = []
125
+ files_list.clear()
126
+ directory = "./outputs"
127
+ random_id = str(random.randint(10000, 99999))
128
+ pattern = f"{random_id}"
129
+ os.makedirs("outputs", exist_ok=True)
130
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
131
+ full_roformer_model = roformer_models[roformer_model]
132
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
133
+ os.system(prompt)
134
+
135
+ for file in os.listdir(directory):
136
+ if re.search(pattern, file):
137
+ files_list.append(os.path.join(directory, file))
138
+
139
+ stem1_file = files_list[0]
140
+ stem2_file = files_list[1]
141
+
142
+ return stem1_file, stem2_file
143
+
144
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
145
+ files_list = []
146
+ files_list.clear()
147
+ directory = "./outputs"
148
+ random_id = str(random.randint(10000, 99999))
149
+ pattern = f"{random_id}"
150
+ os.makedirs("outputs", exist_ok=True)
151
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
152
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
153
+ os.system(prompt)
154
+
155
+ for file in os.listdir(directory):
156
+ if re.search(pattern, file):
157
+ files_list.append(os.path.join(directory, file))
158
+
159
+ stem1_file = files_list[0]
160
+ stem2_file = files_list[1]
161
+
162
+ return stem1_file, stem2_file
163
+
164
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
165
+ files_list = []
166
+ files_list.clear()
167
+ directory = "./outputs"
168
+ random_id = str(random.randint(10000, 99999))
169
+ pattern = f"{random_id}"
170
+ os.makedirs("outputs", exist_ok=True)
171
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
172
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
173
+
174
+ if mdxnet_denoise:
175
+ prompt += " --mdx_enable_denoise"
176
+
177
+ os.system(prompt)
178
+
179
+ for file in os.listdir(directory):
180
+ if re.search(pattern, file):
181
+ files_list.append(os.path.join(directory, file))
182
+
183
+ stem1_file = files_list[0]
184
+ stem2_file = files_list[1]
185
+
186
+ return stem1_file, stem2_file
187
+
188
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
189
+ files_list = []
190
+ files_list.clear()
191
+ directory = "./outputs"
192
+ random_id = str(random.randint(10000, 99999))
193
+ pattern = f"{random_id}"
194
+ os.makedirs("outputs", exist_ok=True)
195
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
196
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
197
+
198
+ if vrarch_tta:
199
+ prompt += " --vr_enable_tta"
200
+ if vrarch_high_end_process:
201
+ prompt += " --vr_high_end_process"
202
+
203
+ os.system(prompt)
204
+
205
+ for file in os.listdir(directory):
206
+ if re.search(pattern, file):
207
+ files_list.append(os.path.join(directory, file))
208
+
209
+ stem1_file = files_list[0]
210
+ stem2_file = files_list[1]
211
+
212
+ return stem1_file, stem2_file
213
+
214
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
215
+ files_list = []
216
+ files_list.clear()
217
+ directory = "./outputs"
218
+ random_id = str(random.randint(10000, 99999))
219
+ pattern = f"{random_id}"
220
+ os.makedirs("outputs", exist_ok=True)
221
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
222
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
223
+
224
+ os.system(prompt)
225
+
226
+ for file in os.listdir(directory):
227
+ if re.search(pattern, file):
228
+ files_list.append(os.path.join(directory, file))
229
+
230
+ stem1_file = files_list[0]
231
+ stem2_file = files_list[1]
232
+ stem3_file = files_list[2]
233
+ stem4_file = files_list[3]
234
+
235
+ return stem1_file, stem2_file, stem3_file, stem4_file
236
+
237
+
238
+
239
+ def download_audio(url):
240
+ ydl_opts = {
241
+ 'format': 'bestaudio/best',
242
+ 'outtmpl': 'ytdl/%(title)s.%(ext)s',
243
+ 'postprocessors': [{
244
+ 'key': 'FFmpegExtractAudio',
245
+ 'preferredcodec': 'mp3',
246
+ 'preferredquality': '192',
247
+ }],
248
+ }
249
+
250
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
251
+ info_dict = ydl.extract_info(url, download=True)
252
+ file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.mp3'
253
+ return file_path
254
+
255
+ with gr.Blocks(theme="NoCrypt/miku@1.2.2", title="🎵 UVR5 UI 🎵") as app:
256
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
257
+ gr.Markdown("If you liked this HF Space you can give me a ❤️")
258
+ with gr.Tabs():
259
+ with gr.TabItem("BS/Mel Roformer"):
260
+ with gr.Row():
261
+ roformer_model = gr.Dropdown(
262
+ label = "Select the Model",
263
+ choices=list(roformer_models.keys()),
264
+ interactive = True
265
+ )
266
+ roformer_output_format = gr.Dropdown(
267
+ label = "Select the Output Format",
268
+ choices = output_format,
269
+ interactive = True
270
+ )
271
+ with gr.Row():
272
+ roformer_overlap = gr.Slider(
273
+ minimum = 2,
274
+ maximum = 4,
275
+ step = 1,
276
+ label = "Overlap",
277
+ info = "Amount of overlap between prediction windows.",
278
+ value = 4,
279
+ interactive = True
280
+ )
281
+ with gr.Row():
282
+ roformer_audio = gr.Audio(
283
+ label = "Input Audio",
284
+ type = "numpy",
285
+ interactive = True
286
+ )
287
+ with gr.Row():
288
+ roformer_button = gr.Button("Separate!", variant = "primary")
289
+ with gr.Row():
290
+ roformer_stem1 = gr.Audio(
291
+ show_download_button = True,
292
+ interactive = False,
293
+ label = "Stem 1",
294
+ type = "filepath"
295
+ )
296
+ roformer_stem2 = gr.Audio(
297
+ show_download_button = True,
298
+ interactive = False,
299
+ label = "Stem 2",
300
+ type = "filepath"
301
+ )
302
+
303
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
304
+
305
+ with gr.TabItem("MDX23C"):
306
+ with gr.Row():
307
+ mdx23c_model = gr.Dropdown(
308
+ label = "Select the Model",
309
+ choices = mdx23c_models,
310
+ interactive = True
311
+ )
312
+ mdx23c_output_format = gr.Dropdown(
313
+ label = "Select the Output Format",
314
+ choices = output_format,
315
+ interactive = True
316
+ )
317
+ with gr.Row():
318
+ mdx23c_segment_size = gr.Slider(
319
+ minimum = 32,
320
+ maximum = 4000,
321
+ step = 32,
322
+ label = "Segment Size",
323
+ info = "Larger consumes more resources, but may give better results.",
324
+ value = 256,
325
+ interactive = True
326
+ )
327
+ mdx23c_overlap = gr.Slider(
328
+ minimum = 2,
329
+ maximum = 50,
330
+ step = 1,
331
+ label = "Overlap",
332
+ info = "Amount of overlap between prediction windows.",
333
+ value = 8,
334
+ interactive = True
335
+ )
336
+ with gr.Row():
337
+ mdx23c_audio = gr.Audio(
338
+ label = "Input Audio",
339
+ type = "numpy",
340
+ interactive = True
341
+ )
342
+ with gr.Row():
343
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
344
+ with gr.Row():
345
+ mdx23c_stem1 = gr.Audio(
346
+ show_download_button = True,
347
+ interactive = False,
348
+ label = "Stem 1",
349
+ type = "filepath"
350
+ )
351
+ mdx23c_stem2 = gr.Audio(
352
+ show_download_button = True,
353
+ interactive = False,
354
+ label = "Stem 2",
355
+ type = "filepath"
356
+ )
357
+
358
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
359
+
360
+ with gr.TabItem("MDX-NET"):
361
+ with gr.Row():
362
+ mdxnet_model = gr.Dropdown(
363
+ label = "Select the Model",
364
+ choices = mdxnet_models,
365
+ interactive = True
366
+ )
367
+ mdxnet_output_format = gr.Dropdown(
368
+ label = "Select the Output Format",
369
+ choices = output_format,
370
+ interactive = True
371
+ )
372
+ with gr.Row():
373
+ mdxnet_segment_size = gr.Slider(
374
+ minimum = 32,
375
+ maximum = 4000,
376
+ step = 32,
377
+ label = "Segment Size",
378
+ info = "Larger consumes more resources, but may give better results.",
379
+ value = 256,
380
+ interactive = True
381
+ )
382
+ mdxnet_overlap = gr.Dropdown(
383
+ label = "Overlap",
384
+ choices = mdxnet_overlap_values,
385
+ value = mdxnet_overlap_values[0],
386
+ interactive = True
387
+ )
388
+ mdxnet_denoise = gr.Checkbox(
389
+ label = "Denoise",
390
+ info = "Enable denoising during separation.",
391
+ value = True,
392
+ interactive = True
393
+ )
394
+ with gr.Row():
395
+ mdxnet_audio = gr.Audio(
396
+ label = "Input Audio",
397
+ type = "numpy",
398
+ interactive = True
399
+ )
400
+ with gr.Row():
401
+ mdxnet_button = gr.Button("Separate!", variant = "primary")
402
+ with gr.Row():
403
+ mdxnet_stem1 = gr.Audio(
404
+ show_download_button = True,
405
+ interactive = False,
406
+ label = "Stem 1",
407
+ type = "filepath"
408
+ )
409
+ mdxnet_stem2 = gr.Audio(
410
+ show_download_button = True,
411
+ interactive = False,
412
+ label = "Stem 2",
413
+ type = "filepath"
414
+ )
415
+
416
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
417
+
418
+ with gr.TabItem("VR ARCH"):
419
+ with gr.Row():
420
+ vrarch_model = gr.Dropdown(
421
+ label = "Select the Model",
422
+ choices = vrarch_models,
423
+ interactive = True
424
+ )
425
+ vrarch_output_format = gr.Dropdown(
426
+ label = "Select the Output Format",
427
+ choices = output_format,
428
+ interactive = True
429
+ )
430
+ with gr.Row():
431
+ vrarch_window_size = gr.Dropdown(
432
+ label = "Window Size",
433
+ choices = vrarch_window_size_values,
434
+ value = vrarch_window_size_values[0],
435
+ interactive = True
436
+ )
437
+ vrarch_agression = gr.Slider(
438
+ minimum = 1,
439
+ maximum = 50,
440
+ step = 1,
441
+ label = "Agression",
442
+ info = "Intensity of primary stem extraction.",
443
+ value = 5,
444
+ interactive = True
445
+ )
446
+ vrarch_tta = gr.Checkbox(
447
+ label = "TTA",
448
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
449
+ value = True,
450
+ visible = True,
451
+ interactive = True,
452
+ )
453
+ vrarch_high_end_process = gr.Checkbox(
454
+ label = "High End Process",
455
+ info = "Mirror the missing frequency range of the output.",
456
+ value = False,
457
+ visible = True,
458
+ interactive = True,
459
+ )
460
+ with gr.Row():
461
+ vrarch_audio = gr.Audio(
462
+ label = "Input Audio",
463
+ type = "numpy",
464
+ interactive = True
465
+ )
466
+ with gr.Row():
467
+ vrarch_button = gr.Button("Separate!", variant = "primary")
468
+ with gr.Row():
469
+ vrarch_stem1 = gr.Audio(
470
+ show_download_button = True,
471
+ interactive = False,
472
+ type = "filepath",
473
+ label = "Stem 1"
474
+ )
475
+ vrarch_stem2 = gr.Audio(
476
+ show_download_button = True,
477
+ interactive = False,
478
+ type = "filepath",
479
+ label = "Stem 2"
480
+ )
481
+
482
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
483
+
484
+ with gr.TabItem("Demucs"):
485
+ with gr.Row():
486
+ demucs_model = gr.Dropdown(
487
+ label = "Select the Model",
488
+ choices = demucs_models,
489
+ interactive = True
490
+ )
491
+ demucs_output_format = gr.Dropdown(
492
+ label = "Select the Output Format",
493
+ choices = output_format,
494
+ interactive = True
495
+ )
496
+ with gr.Row():
497
+ demucs_shifts = gr.Slider(
498
+ minimum = 1,
499
+ maximum = 20,
500
+ step = 1,
501
+ label = "Shifts",
502
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
503
+ value = 2,
504
+ interactive = True
505
+ )
506
+ demucs_overlap = gr.Dropdown(
507
+ label = "Overlap",
508
+ choices = demucs_overlap_values,
509
+ value = demucs_overlap_values[0],
510
+ interactive = True
511
+ )
512
+ with gr.Row():
513
+ demucs_audio = gr.Audio(
514
+ label = "Input Audio",
515
+ type = "numpy",
516
+ interactive = True
517
+ )
518
+ with gr.Row():
519
+ demucs_button = gr.Button("Separate!", variant = "primary")
520
+ with gr.Row():
521
+ demucs_stem1 = gr.Audio(
522
+ show_download_button = True,
523
+ interactive = False,
524
+ type = "filepath",
525
+ label = "Stem 1"
526
+ )
527
+ demucs_stem2 = gr.Audio(
528
+ show_download_button = True,
529
+ interactive = False,
530
+ type = "filepath",
531
+ label = "Stem 2"
532
+ )
533
+ with gr.Row():
534
+ demucs_stem3 = gr.Audio(
535
+ show_download_button = True,
536
+ interactive = False,
537
+ type = "filepath",
538
+ label = "Stem 3"
539
+ )
540
+ demucs_stem4 = gr.Audio(
541
+ show_download_button = True,
542
+ interactive = False,
543
+ type = "filepath",
544
+ label = "Stem 4"
545
+ )
546
+
547
+ demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
548
+
549
+ with gr.TabItem("Credits"):
550
+ gr.Markdown(
551
+ """
552
+ UVR5 UI created by **[Not Eddy (Spanish Mod)](http://discord.com/users/274566299349155851)** in **[AI HUB](https://discord.gg/aihub)** community.
553
+
554
+ * python-audio-separator by [beveradb](https://github.com/beveradb).
555
+ * Thanks to [Ilaria](https://github.com/TheStingerX) and [Mikus](https://github.com/cappuch) for the help with the code.
556
+ * Improvements by [Blane187](https://github.com/Blane187).
557
+
558
+ You can donate to the original UVR5 project here:
559
+ [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
560
+ """
561
+ )
562
+
563
+ app.queue()
564
  app.launch()