Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +1234 -535
  3. requirements.txt +3 -3
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: pink
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: true
9
- sdk_version: 4.37.2
10
  ---
11
 
12
  ![Ilaria AI Suite](./ilariaaisuite.png)
 
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: true
9
+ sdk_version: 5.8.0
10
  ---
11
 
12
  ![Ilaria AI Suite](./ilariaaisuite.png)
app.py CHANGED
@@ -1,26 +1,48 @@
1
  import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- from scipy.io.wavfile import read
6
- import numpy as np
7
- import gradio as gr
8
  import yt_dlp
9
  import spaces
 
 
 
 
 
 
 
 
10
 
 
 
 
11
  roformer_models = {
12
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
13
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
14
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
15
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
 
 
 
 
 
 
 
 
 
16
  }
17
 
 
 
 
18
  mdx23c_models = [
19
  'MDX23C_D1581.ckpt',
20
  'MDX23C-8KFFT-InstVoc_HQ.ckpt',
21
  'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
22
  ]
23
 
 
 
 
24
  mdxnet_models = [
25
  'UVR-MDX-NET-Inst_full_292.onnx',
26
  'UVR-MDX-NET_Inst_187_beta.onnx',
@@ -35,6 +57,7 @@ mdxnet_models = [
35
  'UVR-MDX-NET-Inst_HQ_2.onnx',
36
  'UVR-MDX-NET-Inst_HQ_3.onnx',
37
  'UVR-MDX-NET-Inst_HQ_4.onnx',
 
38
  'UVR_MDXNET_Main.onnx',
39
  'UVR-MDX-NET-Inst_Main.onnx',
40
  'UVR_MDXNET_1_9703.onnx',
@@ -62,6 +85,9 @@ mdxnet_models = [
62
  'kuielab_b_drums.onnx',
63
  ]
64
 
 
 
 
65
  vrarch_models = [
66
  '1_HP-UVR.pth',
67
  '2_HP-UVR.pth',
@@ -92,8 +118,12 @@ vrarch_models = [
92
  'MGM_MAIN_v4.pth',
93
  ]
94
 
 
 
 
95
  demucs_models = [
96
- 'htdemucs_ft.yaml',
 
97
  'htdemucs.yaml',
98
  'hdemucs_mmi.yaml',
99
  ]
@@ -102,582 +132,1251 @@ output_format = [
102
  'wav',
103
  'flac',
104
  'mp3',
 
 
 
 
 
105
  ]
106
 
107
- mdxnet_overlap_values = [
108
- '0.25',
109
- '0.5',
110
- '0.75',
111
- '0.99',
112
- ]
113
 
114
- vrarch_window_size_values = [
115
- '320',
116
- '512',
117
- '1024',
118
- ]
119
 
120
- demucs_overlap_values = [
121
- '0.25',
122
- '0.50',
123
- '0.75',
124
- '0.99',
125
- ]
126
 
127
- @spaces.GPU(duration=300)
128
- def download_audio(url):
129
  ydl_opts = {
130
  'format': 'bestaudio/best',
131
- 'outtmpl': 'ytdl/%(title)s.%(ext)s',
132
  'postprocessors': [{
133
  'key': 'FFmpegExtractAudio',
134
  'preferredcodec': 'wav',
135
- 'preferredquality': '192',
136
  }],
 
 
 
 
137
  }
138
 
139
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
140
- info_dict = ydl.extract_info(url, download=True)
141
- file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
142
- sample_rate, audio_data = read(file_path)
143
- audio_array = np.asarray(audio_data, dtype=np.int16)
144
-
145
- return sample_rate, audio_array
146
-
147
- @spaces.GPU(duration=300)
148
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size):
149
- files_list = []
150
- files_list.clear()
151
- directory = "./outputs"
152
- random_id = str(random.randint(10000, 99999))
153
- pattern = f"{random_id}"
154
- os.makedirs("outputs", exist_ok=True)
155
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
156
- full_roformer_model = roformer_models[roformer_model]
157
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap} --mdxc_segment_size={roformer_segment_size}"
158
- os.system(prompt)
159
-
160
- for file in os.listdir(directory):
161
- if re.search(pattern, file):
162
- files_list.append(os.path.join(directory, file))
163
-
164
- stem1_file = files_list[0]
165
- stem2_file = files_list[1]
166
-
167
- return stem1_file, stem2_file
168
-
169
- @spaces.GPU(duration=300)
170
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap, mdx23c_denoise):
171
- files_list = []
172
- files_list.clear()
173
- directory = "./outputs"
174
- random_id = str(random.randint(10000, 99999))
175
- pattern = f"{random_id}"
176
- os.makedirs("outputs", exist_ok=True)
177
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
178
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
179
-
180
- if mdx23c_denoise:
181
- prompt += " --mdx_enable_denoise"
182
-
183
- os.system(prompt)
184
-
185
- for file in os.listdir(directory):
186
- if re.search(pattern, file):
187
- files_list.append(os.path.join(directory, file))
188
-
189
- stem1_file = files_list[0]
190
- stem2_file = files_list[1]
191
-
192
- return stem1_file, stem2_file
193
-
194
- @spaces.GPU(duration=300)
195
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
196
- files_list = []
197
- files_list.clear()
198
- directory = "./outputs"
199
- random_id = str(random.randint(10000, 99999))
200
- pattern = f"{random_id}"
201
- os.makedirs("outputs", exist_ok=True)
202
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
203
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
204
-
205
- if mdxnet_denoise:
206
- prompt += " --mdx_enable_denoise"
207
-
208
- os.system(prompt)
209
-
210
- for file in os.listdir(directory):
211
- if re.search(pattern, file):
212
- files_list.append(os.path.join(directory, file))
213
-
214
- stem1_file = files_list[0]
215
- stem2_file = files_list[1]
216
-
217
- return stem1_file, stem2_file
218
-
219
- @spaces.GPU(duration=300)
220
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
221
- files_list = []
222
- files_list.clear()
223
- directory = "./outputs"
224
- random_id = str(random.randint(10000, 99999))
225
- pattern = f"{random_id}"
226
- os.makedirs("outputs", exist_ok=True)
227
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
228
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
229
-
230
- if vrarch_tta:
231
- prompt += " --vr_enable_tta"
232
- if vrarch_high_end_process:
233
- prompt += " --vr_high_end_process"
234
-
235
- os.system(prompt)
236
-
237
- for file in os.listdir(directory):
238
- if re.search(pattern, file):
239
- files_list.append(os.path.join(directory, file))
240
-
241
- stem1_file = files_list[0]
242
- stem2_file = files_list[1]
243
-
244
- return stem1_file, stem2_file
245
-
246
- @spaces.GPU(duration=300)
247
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
248
- files_list = []
249
- files_list.clear()
250
- directory = "./outputs"
251
- random_id = str(random.randint(10000, 99999))
252
- pattern = f"{random_id}"
253
- os.makedirs("outputs", exist_ok=True)
254
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
255
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
256
-
257
- os.system(prompt)
258
-
259
- for file in os.listdir(directory):
260
- if re.search(pattern, file):
261
- files_list.append(os.path.join(directory, file))
262
-
263
- stem1_file = files_list[0]
264
- stem2_file = files_list[1]
265
- stem3_file = files_list[2]
266
- stem4_file = files_list[3]
267
-
268
- return stem1_file, stem2_file, stem3_file, stem4_file
269
-
270
- with gr.Blocks(theme="NoCrypt/[email protected]", title="🎵 UVR5 UI 🎵") as app:
271
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
272
- gr.Markdown("If you liked this HF Space you can give me a ❤️")
273
- gr.Markdown("Try UVR5 UI using Colab [here](https://colab.research.google.com/github/Eddycrack864/UVR5-UI/blob/main/UVR_UI.ipynb)")
274
- with gr.Tabs():
275
- with gr.TabItem("BS/Mel Roformer"):
276
- with gr.Row():
277
- roformer_model = gr.Dropdown(
278
- label = "Select the Model",
279
- choices=list(roformer_models.keys()),
280
- interactive = True
281
- )
282
- roformer_output_format = gr.Dropdown(
283
- label = "Select the Output Format",
284
- choices = output_format,
285
- interactive = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  )
287
- with gr.Row():
288
- roformer_overlap = gr.Slider(
289
- minimum = 2,
290
- maximum = 4,
291
- step = 1,
292
- label = "Overlap",
293
- info = "Amount of overlap between prediction windows.",
294
- value = 4,
295
- interactive = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  )
297
- roformer_segment_size = gr.Slider(
298
- minimum = 32,
299
- maximum = 4000,
300
- step = 32,
301
- label = "Segment Size",
302
- info = "Larger consumes more resources, but may give better results.",
303
- value = 256,
304
- interactive = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  )
306
- with gr.Row():
307
- roformer_audio = gr.Audio(
308
- label = "Input Audio",
309
- type = "numpy",
310
- interactive = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  )
312
- with gr.Accordion("Separation by Link", open = False):
313
- with gr.Row():
314
- roformer_link = gr.Textbox(
315
- label = "Link",
316
- placeholder = "Paste the link here",
317
- interactive = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  with gr.Row():
320
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  with gr.Row():
322
- roformer_download_button = gr.Button(
323
- "Download!",
324
- variant = "primary"
325
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
- roformer_download_button.click(download_audio, [roformer_link], [roformer_audio])
328
 
329
- with gr.Row():
330
- roformer_button = gr.Button("Separate!", variant = "primary")
331
- with gr.Row():
332
- roformer_stem1 = gr.Audio(
333
- show_download_button = True,
334
- interactive = False,
335
- label = "Stem 1",
336
- type = "filepath"
337
- )
338
- roformer_stem2 = gr.Audio(
339
- show_download_button = True,
340
- interactive = False,
341
- label = "Stem 2",
342
- type = "filepath"
343
- )
 
 
 
 
 
 
344
 
345
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size], [roformer_stem1, roformer_stem2])
346
-
347
- with gr.TabItem("MDX23C"):
348
- with gr.Row():
349
- mdx23c_model = gr.Dropdown(
350
- label = "Select the Model",
351
- choices = mdx23c_models,
352
- interactive = True
353
- )
354
- mdx23c_output_format = gr.Dropdown(
355
- label = "Select the Output Format",
356
- choices = output_format,
357
- interactive = True
358
- )
359
- with gr.Row():
360
- mdx23c_segment_size = gr.Slider(
361
- minimum = 32,
362
- maximum = 4000,
363
- step = 32,
364
- label = "Segment Size",
365
- info = "Larger consumes more resources, but may give better results.",
366
- value = 256,
367
- interactive = True
368
- )
369
- mdx23c_overlap = gr.Slider(
370
- minimum = 2,
371
- maximum = 50,
372
- step = 1,
373
- label = "Overlap",
374
- info = "Amount of overlap between prediction windows.",
375
- value = 8,
376
- interactive = True
377
- )
378
- mdx23c_denoise = gr.Checkbox(
379
- label = "Denoise",
380
- info = "Enable denoising during separation.",
381
- value = False,
382
- interactive = True
383
- )
384
- with gr.Row():
385
- mdx23c_audio = gr.Audio(
386
- label = "Input Audio",
387
- type = "numpy",
388
- interactive = True
389
- )
390
- with gr.Accordion("Separation by Link", open = False):
391
  with gr.Row():
392
- mdx23c_link = gr.Textbox(
393
- label = "Link",
394
- placeholder = "Paste the link here",
395
- interactive = True
396
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  with gr.Row():
398
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  with gr.Row():
400
- mdx23c_download_button = gr.Button(
401
- "Download!",
402
- variant = "primary"
403
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
- mdx23c_download_button.click(download_audio, [mdx23c_link], [mdx23c_audio])
406
 
407
- with gr.Row():
408
- mdx23c_button = gr.Button("Separate!", variant = "primary")
409
- with gr.Row():
410
- mdx23c_stem1 = gr.Audio(
411
- show_download_button = True,
412
- interactive = False,
413
- label = "Stem 1",
414
- type = "filepath"
415
- )
416
- mdx23c_stem2 = gr.Audio(
417
- show_download_button = True,
418
- interactive = False,
419
- label = "Stem 2",
420
- type = "filepath"
421
- )
 
 
 
 
422
 
423
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap, mdx23c_denoise], [mdx23c_stem1, mdx23c_stem2])
424
-
425
- with gr.TabItem("MDX-NET"):
426
- with gr.Row():
427
- mdxnet_model = gr.Dropdown(
428
- label = "Select the Model",
429
- choices = mdxnet_models,
430
- interactive = True
431
- )
432
- mdxnet_output_format = gr.Dropdown(
433
- label = "Select the Output Format",
434
- choices = output_format,
435
- interactive = True
436
- )
437
- with gr.Row():
438
- mdxnet_segment_size = gr.Slider(
439
- minimum = 32,
440
- maximum = 4000,
441
- step = 32,
442
- label = "Segment Size",
443
- info = "Larger consumes more resources, but may give better results.",
444
- value = 256,
445
- interactive = True
446
- )
447
- mdxnet_overlap = gr.Dropdown(
448
- label = "Overlap",
449
- choices = mdxnet_overlap_values,
450
- value = mdxnet_overlap_values[0],
451
  interactive = True
452
- )
453
- mdxnet_denoise = gr.Checkbox(
454
- label = "Denoise",
455
- info = "Enable denoising during separation.",
456
- value = True,
457
- interactive = True
458
- )
459
- with gr.Row():
460
- mdxnet_audio = gr.Audio(
461
- label = "Input Audio",
462
- type = "numpy",
463
- interactive = True
464
- )
465
- with gr.Accordion("Separation by Link", open = False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  with gr.Row():
467
- mdxnet_link = gr.Textbox(
468
- label = "Link",
469
- placeholder = "Paste the link here",
470
- interactive = True
471
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  with gr.Row():
473
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
474
  with gr.Row():
475
- mdxnet_download_button = gr.Button(
476
- "Download!",
477
- variant = "primary"
478
- )
 
 
 
 
 
 
 
 
479
 
480
- mdxnet_download_button.click(download_audio, [mdxnet_link], [mdxnet_audio])
481
 
482
- with gr.Row():
483
- mdxnet_button = gr.Button("Separate!", variant = "primary")
484
- with gr.Row():
485
- mdxnet_stem1 = gr.Audio(
486
- show_download_button = True,
487
- interactive = False,
488
- label = "Stem 1",
489
- type = "filepath"
490
- )
491
- mdxnet_stem2 = gr.Audio(
492
- show_download_button = True,
493
- interactive = False,
494
- label = "Stem 2",
495
- type = "filepath"
496
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
 
500
- with gr.TabItem("VR ARCH"):
501
- with gr.Row():
502
- vrarch_model = gr.Dropdown(
503
- label = "Select the Model",
504
- choices = vrarch_models,
505
- interactive = True
506
- )
507
- vrarch_output_format = gr.Dropdown(
508
- label = "Select the Output Format",
509
- choices = output_format,
510
- interactive = True
511
- )
512
- with gr.Row():
513
- vrarch_window_size = gr.Dropdown(
514
- label = "Window Size",
515
- choices = vrarch_window_size_values,
516
- value = vrarch_window_size_values[0],
517
- interactive = True
518
- )
519
- vrarch_agression = gr.Slider(
520
- minimum = 1,
521
- maximum = 50,
522
- step = 1,
523
- label = "Agression",
524
- info = "Intensity of primary stem extraction.",
525
- value = 5,
526
- interactive = True
527
- )
528
- vrarch_tta = gr.Checkbox(
529
- label = "TTA",
530
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
531
- value = True,
532
- visible = True,
533
- interactive = True,
534
- )
535
- vrarch_high_end_process = gr.Checkbox(
536
- label = "High End Process",
537
- info = "Mirror the missing frequency range of the output.",
538
- value = False,
539
- visible = True,
540
- interactive = True,
541
- )
542
- with gr.Row():
543
- vrarch_audio = gr.Audio(
544
- label = "Input Audio",
545
- type = "numpy",
546
- interactive = True
547
- )
548
- with gr.Accordion("Separation by Link", open = False):
549
  with gr.Row():
550
- vrarch_link = gr.Textbox(
551
- label = "Link",
552
- placeholder = "Paste the link here",
553
- interactive = True
554
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  with gr.Row():
556
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  with gr.Row():
558
- vrarch_download_button = gr.Button(
559
- "Download!",
560
- variant = "primary"
561
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
- vrarch_download_button.click(download_audio, [vrarch_link], [vrarch_audio])
564
 
565
- with gr.Row():
566
- vrarch_button = gr.Button("Separate!", variant = "primary")
567
- with gr.Row():
568
- vrarch_stem1 = gr.Audio(
569
- show_download_button = True,
570
- interactive = False,
571
- type = "filepath",
572
- label = "Stem 1"
573
- )
574
- vrarch_stem2 = gr.Audio(
575
- show_download_button = True,
576
- interactive = False,
577
- type = "filepath",
578
- label = "Stem 2"
579
- )
 
 
 
 
580
 
581
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
582
 
583
- with gr.TabItem("Demucs"):
584
- with gr.Row():
585
- demucs_model = gr.Dropdown(
586
- label = "Select the Model",
587
- choices = demucs_models,
588
- interactive = True
589
- )
590
- demucs_output_format = gr.Dropdown(
591
- label = "Select the Output Format",
592
- choices = output_format,
593
- interactive = True
594
- )
595
- with gr.Row():
596
- demucs_shifts = gr.Slider(
597
- minimum = 1,
598
- maximum = 20,
599
- step = 1,
600
- label = "Shifts",
601
- info = "Number of predictions with random shifts, higher = slower but better quality.",
602
- value = 2,
603
- interactive = True
604
- )
605
- demucs_overlap = gr.Dropdown(
606
- label = "Overlap",
607
- choices = demucs_overlap_values,
608
- value = demucs_overlap_values[0],
609
- interactive = True
610
- )
611
- with gr.Row():
612
- demucs_audio = gr.Audio(
613
- label = "Input Audio",
614
- type = "numpy",
615
- interactive = True
616
- )
617
- with gr.Accordion("Separation by Link", open = False):
618
  with gr.Row():
619
- demucs_link = gr.Textbox(
620
- label = "Link",
621
- placeholder = "Paste the link here",
622
- interactive = True
623
- )
624
  with gr.Row():
625
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
 
 
 
 
 
 
 
 
 
 
 
626
  with gr.Row():
627
- demucs_download_button = gr.Button(
628
- "Download!",
629
- variant = "primary"
630
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
631
 
632
- demucs_download_button.click(download_audio, [demucs_link], [demucs_audio])
 
 
633
 
634
- with gr.Row():
635
- demucs_button = gr.Button("Separate!", variant = "primary")
636
- with gr.Row():
637
- demucs_stem1 = gr.Audio(
638
- show_download_button = True,
639
- interactive = False,
640
- type = "filepath",
641
- label = "Stem 1"
642
  )
643
- demucs_stem2 = gr.Audio(
644
- show_download_button = True,
645
- interactive = False,
646
- type = "filepath",
647
- label = "Stem 2"
648
- )
649
- with gr.Row():
650
- demucs_stem3 = gr.Audio(
651
- show_download_button = True,
652
- interactive = False,
653
- type = "filepath",
654
- label = "Stem 3"
655
  )
656
- demucs_stem4 = gr.Audio(
657
- show_download_button = True,
658
- interactive = False,
659
- type = "filepath",
660
- label = "Stem 4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  )
662
-
663
- demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
664
-
665
- with gr.TabItem("Credits"):
666
- gr.Markdown(
667
- """
668
- UVR5 UI created by **[Eddycrack 864](https://github.com/Eddycrack864).** Join **[AI HUB](https://discord.gg/aihub)** community.
669
-
670
- * python-audio-separator by [beveradb](https://github.com/beveradb).
671
- * Special thanks to [Ilaria](https://github.com/TheStingerX) for hosting this space and help.
672
- * Thanks to [Mikus](https://github.com/cappuch) for the help with the code.
673
- * Thanks to [Nick088](https://huggingface.co/Nick088) for the help to fix roformers.
674
- * Thanks to [yt_dlp](https://github.com/yt-dlp/yt-dlp) devs.
675
- * Separation by link source code and improvements by [Blane187](https://huggingface.co/Blane187).
676
-
677
- You can donate to the original UVR5 project here:
678
- [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
679
- """
680
- )
681
 
682
  app.queue()
683
  app.launch()
 
1
  import os
2
+ import torch
3
+ import logging
 
 
 
 
4
  import yt_dlp
5
  import spaces
6
+ import gradio as gr
7
+ import assets.themes.loadThemes as loadThemes
8
+ from gradio_i18n import Translate
9
+ from gradio_i18n import gettext as _
10
+ from audio_separator.separator import Separator
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ use_autocast = device == "cuda"
14
 
15
+ #=========================#
16
+ # Roformer Models #
17
+ #=========================#
18
  roformer_models = {
19
+ 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
20
+ 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
21
+ 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
22
+ 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
23
+ 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
24
+ 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
25
+ 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
26
+ 'Mel-Roformer-Denoise-Aufr33-Aggr' : 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
27
+ 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
28
+ 'MelBand Roformer Kim | Inst V1 by Unwa' : 'melband_roformer_inst_v1.ckpt',
29
+ 'MelBand Roformer Kim | Inst V2 by Unwa' : 'melband_roformer_inst_v2.ckpt',
30
+ 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa' : 'melband_roformer_instvoc_duality_v1.ckpt',
31
+ 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa' : 'melband_roformer_instvox_duality_v2.ckpt',
32
  }
33
 
34
+ #=========================#
35
+ # MDX23C Models #
36
+ #=========================#
37
  mdx23c_models = [
38
  'MDX23C_D1581.ckpt',
39
  'MDX23C-8KFFT-InstVoc_HQ.ckpt',
40
  'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
41
  ]
42
 
43
+ #=========================#
44
+ # MDXN-NET Models #
45
+ #=========================#
46
  mdxnet_models = [
47
  'UVR-MDX-NET-Inst_full_292.onnx',
48
  'UVR-MDX-NET_Inst_187_beta.onnx',
 
57
  'UVR-MDX-NET-Inst_HQ_2.onnx',
58
  'UVR-MDX-NET-Inst_HQ_3.onnx',
59
  'UVR-MDX-NET-Inst_HQ_4.onnx',
60
+ 'UVR-MDX-NET-Inst_HQ_5.onnx',
61
  'UVR_MDXNET_Main.onnx',
62
  'UVR-MDX-NET-Inst_Main.onnx',
63
  'UVR_MDXNET_1_9703.onnx',
 
85
  'kuielab_b_drums.onnx',
86
  ]
87
 
88
+ #========================#
89
+ # VR-ARCH Models #
90
+ #========================#
91
  vrarch_models = [
92
  '1_HP-UVR.pth',
93
  '2_HP-UVR.pth',
 
118
  'MGM_MAIN_v4.pth',
119
  ]
120
 
121
+ #=======================#
122
+ # DEMUCS Models #
123
+ #=======================#
124
  demucs_models = [
125
+ 'htdemucs_ft.yaml',
126
+ 'htdemucs_6s.yaml',
127
  'htdemucs.yaml',
128
  'hdemucs_mmi.yaml',
129
  ]
 
132
  'wav',
133
  'flac',
134
  'mp3',
135
+ 'ogg',
136
+ 'opus',
137
+ 'm4a',
138
+ 'aiff',
139
+ 'ac3'
140
  ]
141
 
142
+ found_files = []
143
+ logs = []
144
+ out_dir = "./outputs"
145
+ models_dir = "./models"
146
+ extensions = (".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")
 
147
 
148
+ def download_audio(url, output_dir="ytdl"):
 
 
 
 
149
 
150
+ os.makedirs(output_dir, exist_ok=True)
 
 
 
 
 
151
 
 
 
152
  ydl_opts = {
153
  'format': 'bestaudio/best',
 
154
  'postprocessors': [{
155
  'key': 'FFmpegExtractAudio',
156
  'preferredcodec': 'wav',
157
+ 'preferredquality': '32',
158
  }],
159
+ 'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
160
+ 'postprocessor_args': [
161
+ '-acodec', 'pcm_f32le'
162
+ ],
163
  }
164
 
165
+ try:
166
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
167
+ info = ydl.extract_info(url, download=False)
168
+ video_title = info['title']
169
+
170
+ ydl.download([url])
171
+
172
+ file_path = os.path.join(output_dir, f"{video_title}.wav")
173
+
174
+ if os.path.exists(file_path):
175
+ return os.path.abspath(file_path)
176
+ else:
177
+ raise Exception("Something went wrong")
178
+
179
+ except Exception as e:
180
+ raise Exception(f"Error extracting audio with yt-dlp: {str(e)}")
181
+
182
+ @spaces.GPU(duration=60)
183
+ def roformer_separator(audio, model_key, out_format, segment_size, override_seg_size, overlap, batch_size, norm_thresh, amp_thresh, progress=gr.Progress(track_tqdm=True)):
184
+ base_name = os.path.splitext(os.path.basename(audio))[0]
185
+ roformer_model = roformer_models[model_key]
186
+ try:
187
+ separator = Separator(
188
+ log_level=logging.WARNING,
189
+ model_file_dir=models_dir,
190
+ output_dir=out_dir,
191
+ output_format=out_format,
192
+ use_autocast=use_autocast,
193
+ normalization_threshold=norm_thresh,
194
+ amplification_threshold=amp_thresh,
195
+ mdxc_params={
196
+ "segment_size": segment_size,
197
+ "override_model_segment_size": override_seg_size,
198
+ "batch_size": batch_size,
199
+ "overlap": overlap,
200
+ }
201
+ )
202
+
203
+ progress(0.2, desc="Loading model...")
204
+ separator.load_model(model_filename=roformer_model)
205
+
206
+ progress(0.7, desc="Separating audio...")
207
+ separation = separator.separate(audio, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
208
+
209
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
210
+ return stems[1], stems[0]
211
+ except Exception as e:
212
+ raise RuntimeError(f"Roformer separation failed: {e}") from e
213
+
214
+ @spaces.GPU(duration=60)
215
+ def mdxc_separator(audio, model, out_format, segment_size, override_seg_size, overlap, batch_size, norm_thresh, amp_thresh, progress=gr.Progress(track_tqdm=True)):
216
+ base_name = os.path.splitext(os.path.basename(audio))[0]
217
+ try:
218
+ separator = Separator(
219
+ log_level=logging.WARNING,
220
+ model_file_dir=models_dir,
221
+ output_dir=out_dir,
222
+ output_format=out_format,
223
+ use_autocast=use_autocast,
224
+ normalization_threshold=norm_thresh,
225
+ amplification_threshold=amp_thresh,
226
+ mdxc_params={
227
+ "segment_size": segment_size,
228
+ "override_model_segment_size": override_seg_size,
229
+ "batch_size": batch_size,
230
+ "overlap": overlap,
231
+ }
232
+ )
233
+
234
+ progress(0.2, desc="Loading model...")
235
+ separator.load_model(model_filename=model)
236
+
237
+ progress(0.7, desc="Separating audio...")
238
+ separation = separator.separate(audio, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
239
+
240
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
241
+ return stems[1], stems[0]
242
+ except Exception as e:
243
+ raise RuntimeError(f"MDX23C separation failed: {e}") from e
244
+
245
+ @spaces.GPU(duration=60)
246
+ def mdxnet_separator(audio, model, out_format, hop_length, segment_size, denoise, overlap, batch_size, norm_thresh, amp_thresh, progress=gr.Progress(track_tqdm=True)):
247
+ base_name = os.path.splitext(os.path.basename(audio))[0]
248
+ try:
249
+ separator = Separator(
250
+ log_level=logging.WARNING,
251
+ model_file_dir=models_dir,
252
+ output_dir=out_dir,
253
+ output_format=out_format,
254
+ use_autocast=use_autocast,
255
+ normalization_threshold=norm_thresh,
256
+ amplification_threshold=amp_thresh,
257
+ mdx_params={
258
+ "hop_length": hop_length,
259
+ "segment_size": segment_size,
260
+ "overlap": overlap,
261
+ "batch_size": batch_size,
262
+ "enable_denoise": denoise,
263
+ }
264
+ )
265
+
266
+ progress(0.2, desc="Loading model...")
267
+ separator.load_model(model_filename=model)
268
+
269
+ progress(0.7, desc="Separating audio...")
270
+ separation = separator.separate(audio, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
271
+
272
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
273
+ return stems[0], stems[1]
274
+ except Exception as e:
275
+ raise RuntimeError(f"MDX-NET separation failed: {e}") from e
276
+
277
+ @spaces.GPU(duration=60)
278
+ def vrarch_separator(audio, model, out_format, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, batch_size, norm_thresh, amp_thresh, progress=gr.Progress(track_tqdm=True)):
279
+ base_name = os.path.splitext(os.path.basename(audio))[0]
280
+ try:
281
+ separator = Separator(
282
+ log_level=logging.WARNING,
283
+ model_file_dir=models_dir,
284
+ output_dir=out_dir,
285
+ output_format=out_format,
286
+ use_autocast=use_autocast,
287
+ normalization_threshold=norm_thresh,
288
+ amplification_threshold=amp_thresh,
289
+ vr_params={
290
+ "batch_size": batch_size,
291
+ "window_size": window_size,
292
+ "aggression": aggression,
293
+ "enable_tta": tta,
294
+ "enable_post_process": post_process,
295
+ "post_process_threshold": post_process_threshold,
296
+ "high_end_process": high_end_process,
297
+ }
298
+ )
299
+
300
+ progress(0.2, desc="Loading model...")
301
+ separator.load_model(model_filename=model)
302
+
303
+ progress(0.7, desc="Separating audio...")
304
+ separation = separator.separate(audio, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
305
+
306
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
307
+ return stems[0], stems[1]
308
+ except Exception as e:
309
+ raise RuntimeError(f"VR ARCH separation failed: {e}") from e
310
+
311
+ @spaces.GPU(duration=60)
312
+ def demucs_separator(audio, model, out_format, shifts, segment_size, segments_enabled, overlap, batch_size, norm_thresh, amp_thresh, progress=gr.Progress(track_tqdm=True)):
313
+ base_name = os.path.splitext(os.path.basename(audio))[0]
314
+ try:
315
+ separator = Separator(
316
+ log_level=logging.WARNING,
317
+ model_file_dir=models_dir,
318
+ output_dir=out_dir,
319
+ output_format=out_format,
320
+ use_autocast=use_autocast,
321
+ normalization_threshold=norm_thresh,
322
+ amplification_threshold=amp_thresh,
323
+ demucs_params={
324
+ "batch_size": batch_size,
325
+ "segment_size": segment_size,
326
+ "shifts": shifts,
327
+ "overlap": overlap,
328
+ "segments_enabled": segments_enabled,
329
+ }
330
+ )
331
+
332
+ progress(0.2, desc="Loading model...")
333
+ separator.load_model(model_filename=model)
334
+
335
+ progress(0.7, desc="Separating audio...")
336
+ separation = separator.separate(audio)
337
+
338
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
339
+
340
+ if model == "htdemucs_6s.yaml":
341
+ return stems[0], stems[1], stems[2], stems[3], stems[4], stems[5]
342
+ else:
343
+ return stems[0], stems[1], stems[2], stems[3], None, None
344
+ except Exception as e:
345
+ raise RuntimeError(f"Demucs separation failed: {e}") from e
346
+
347
+ def update_stems(model):
348
+ if model == "htdemucs_6s.yaml":
349
+ return gr.update(visible=True)
350
+ else:
351
+ return gr.update(visible=False)
352
+
353
+ @spaces.GPU(duration=60)
354
+ def roformer_batch(path_input, path_output, model_key, out_format, segment_size, override_seg_size, overlap, batch_size, norm_thresh, amp_thresh):
355
+ found_files.clear()
356
+ logs.clear()
357
+ roformer_model = roformer_models[model_key]
358
+
359
+ for audio_files in os.listdir(path_input):
360
+ if audio_files.endswith(extensions):
361
+ found_files.append(audio_files)
362
+ total_files = len(found_files)
363
+
364
+ if total_files == 0:
365
+ logs.append("No valid audio files.")
366
+ yield "\n".join(logs)
367
+ else:
368
+ logs.append(f"{total_files} audio files found")
369
+ found_files.sort()
370
+
371
+ for audio_files in found_files:
372
+ file_path = os.path.join(path_input, audio_files)
373
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
374
+ try:
375
+ separator = Separator(
376
+ log_level=logging.WARNING,
377
+ model_file_dir=models_dir,
378
+ output_dir=path_output,
379
+ output_format=out_format,
380
+ use_autocast=use_autocast,
381
+ normalization_threshold=norm_thresh,
382
+ amplification_threshold=amp_thresh,
383
+ mdxc_params={
384
+ "segment_size": segment_size,
385
+ "override_model_segment_size": override_seg_size,
386
+ "batch_size": batch_size,
387
+ "overlap": overlap,
388
+ }
389
  )
390
+
391
+ logs.append("Loading model...")
392
+ yield "\n".join(logs)
393
+ separator.load_model(model_filename=roformer_model)
394
+
395
+ logs.append(f"Separating file: {audio_files}")
396
+ yield "\n".join(logs)
397
+ separator.separate(file_path, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
398
+ logs.append(f"File: {audio_files} separated!")
399
+ yield "\n".join(logs)
400
+ except Exception as e:
401
+ raise RuntimeError(f"Roformer batch separation failed: {e}") from e
402
+
403
+ @spaces.GPU(duration=60)
404
+ def mdx23c_batch(path_input, path_output, model, out_format, segment_size, override_seg_size, overlap, batch_size, norm_thresh, amp_thresh):
405
+ found_files.clear()
406
+ logs.clear()
407
+
408
+ for audio_files in os.listdir(path_input):
409
+ if audio_files.endswith(extensions):
410
+ found_files.append(audio_files)
411
+ total_files = len(found_files)
412
+
413
+ if total_files == 0:
414
+ logs.append("No valid audio files.")
415
+ yield "\n".join(logs)
416
+ else:
417
+ logs.append(f"{total_files} audio files found")
418
+ found_files.sort()
419
+
420
+ for audio_files in found_files:
421
+ file_path = os.path.join(path_input, audio_files)
422
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
423
+ try:
424
+ separator = Separator(
425
+ log_level=logging.WARNING,
426
+ model_file_dir=models_dir,
427
+ output_dir=path_output,
428
+ output_format=out_format,
429
+ use_autocast=use_autocast,
430
+ normalization_threshold=norm_thresh,
431
+ amplification_threshold=amp_thresh,
432
+ mdxc_params={
433
+ "segment_size": segment_size,
434
+ "override_model_segment_size": override_seg_size,
435
+ "batch_size": batch_size,
436
+ "overlap": overlap,
437
+ }
438
  )
439
+
440
+ logs.append("Loading model...")
441
+ yield "\n".join(logs)
442
+ separator.load_model(model_filename=model)
443
+
444
+ logs.append(f"Separating file: {audio_files}")
445
+ yield "\n".join(logs)
446
+ separator.separate(file_path, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
447
+ logs.append(f"File: {audio_files} separated!")
448
+ yield "\n".join(logs)
449
+ except Exception as e:
450
+ raise RuntimeError(f"Roformer batch separation failed: {e}") from e
451
+
452
+ @spaces.GPU(duration=60)
453
+ def mdxnet_batch(path_input, path_output, model, out_format, hop_length, segment_size, denoise, overlap, batch_size, norm_thresh, amp_thresh):
454
+ found_files.clear()
455
+ logs.clear()
456
+
457
+ for audio_files in os.listdir(path_input):
458
+ if audio_files.endswith(extensions):
459
+ found_files.append(audio_files)
460
+ total_files = len(found_files)
461
+
462
+ if total_files == 0:
463
+ logs.append("No valid audio files.")
464
+ yield "\n".join(logs)
465
+ else:
466
+ logs.append(f"{total_files} audio files found")
467
+ found_files.sort()
468
+
469
+ for audio_files in found_files:
470
+ file_path = os.path.join(path_input, audio_files)
471
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
472
+ try:
473
+ separator = Separator(
474
+ log_level=logging.WARNING,
475
+ model_file_dir=models_dir,
476
+ output_dir=path_output,
477
+ output_format=out_format,
478
+ use_autocast=use_autocast,
479
+ normalization_threshold=norm_thresh,
480
+ amplification_threshold=amp_thresh,
481
+ mdx_params={
482
+ "hop_length": hop_length,
483
+ "segment_size": segment_size,
484
+ "overlap": overlap,
485
+ "batch_size": batch_size,
486
+ "enable_denoise": denoise,
487
+ }
488
  )
489
+
490
+ logs.append("Loading model...")
491
+ yield "\n".join(logs)
492
+ separator.load_model(model_filename=model)
493
+
494
+ logs.append(f"Separating file: {audio_files}")
495
+ yield "\n".join(logs)
496
+ separator.separate(file_path, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
497
+ logs.append(f"File: {audio_files} separated!")
498
+ yield "\n".join(logs)
499
+ except Exception as e:
500
+ raise RuntimeError(f"Roformer batch separation failed: {e}") from e
501
+
502
+ @spaces.GPU(duration=60)
503
+ def vrarch_batch(path_input, path_output, model, out_format, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, batch_size, norm_thresh, amp_thresh):
504
+ found_files.clear()
505
+ logs.clear()
506
+
507
+ for audio_files in os.listdir(path_input):
508
+ if audio_files.endswith(extensions):
509
+ found_files.append(audio_files)
510
+ total_files = len(found_files)
511
+
512
+ if total_files == 0:
513
+ logs.append("No valid audio files.")
514
+ yield "\n".join(logs)
515
+ else:
516
+ logs.append(f"{total_files} audio files found")
517
+ found_files.sort()
518
+
519
+ for audio_files in found_files:
520
+ file_path = os.path.join(path_input, audio_files)
521
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
522
+ try:
523
+ separator = Separator(
524
+ log_level=logging.WARNING,
525
+ model_file_dir=models_dir,
526
+ output_dir=path_output,
527
+ output_format=out_format,
528
+ use_autocast=use_autocast,
529
+ normalization_threshold=norm_thresh,
530
+ amplification_threshold=amp_thresh,
531
+ vr_params={
532
+ "batch_size": batch_size,
533
+ "window_size": window_size,
534
+ "aggression": aggression,
535
+ "enable_tta": tta,
536
+ "enable_post_process": post_process,
537
+ "post_process_threshold": post_process_threshold,
538
+ "high_end_process": high_end_process,
539
+ }
540
  )
541
+
542
+ logs.append("Loading model...")
543
+ yield "\n".join(logs)
544
+ separator.load_model(model_filename=model)
545
+
546
+ logs.append(f"Separating file: {audio_files}")
547
+ yield "\n".join(logs)
548
+ separator.separate(file_path, f"{base_name}_(Stem1)", f"{base_name}_(Stem2)")
549
+ logs.append(f"File: {audio_files} separated!")
550
+ yield "\n".join(logs)
551
+ except Exception as e:
552
+ raise RuntimeError(f"Roformer batch separation failed: {e}") from e
553
+
554
+ @spaces.GPU(duration=60)
555
+ def demucs_batch(path_input, path_output, model, out_format, shifts, segment_size, segments_enabled, overlap, batch_size, norm_thresh, amp_thresh):
556
+ found_files.clear()
557
+ logs.clear()
558
+
559
+ for audio_files in os.listdir(path_input):
560
+ if audio_files.endswith(extensions):
561
+ found_files.append(audio_files)
562
+ total_files = len(found_files)
563
+
564
+ if total_files == 0:
565
+ logs.append("No valid audio files.")
566
+ yield "\n".join(logs)
567
+ else:
568
+ logs.append(f"{total_files} audio files found")
569
+ found_files.sort()
570
+
571
+ for audio_files in found_files:
572
+ file_path = os.path.join(path_input, audio_files)
573
+ try:
574
+ separator = Separator(
575
+ log_level=logging.WARNING,
576
+ model_file_dir=models_dir,
577
+ output_dir=path_output,
578
+ output_format=out_format,
579
+ use_autocast=use_autocast,
580
+ normalization_threshold=norm_thresh,
581
+ amplification_threshold=amp_thresh,
582
+ demucs_params={
583
+ "batch_size": batch_size,
584
+ "segment_size": segment_size,
585
+ "shifts": shifts,
586
+ "overlap": overlap,
587
+ "segments_enabled": segments_enabled,
588
+ }
589
  )
590
+
591
+ logs.append("Loading model...")
592
+ yield "\n".join(logs)
593
+ separator.load_model(model_filename=model)
594
+
595
+ logs.append(f"Separating file: {audio_files}")
596
+ yield "\n".join(logs)
597
+ separator.separate(file_path)
598
+ logs.append(f"File: {audio_files} separated!")
599
+ yield "\n".join(logs)
600
+ except Exception as e:
601
+ raise RuntimeError(f"Roformer batch separation failed: {e}") from e
602
+
603
+ with gr.Blocks(theme = loadThemes.load_json() or "NoCrypt/miku", title = "🎵 UVR5 UI 🎵") as app:
604
+ with Translate("assets/languages/translation.yaml", placeholder_langs = ["en", "es", "it", "pt", "ms", "id", "ru", "uk", "th", "zh", "ja", "ko", "tr", "hi"]) as lang:
605
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
606
+ gr.Markdown("If you liked this HF Space you can give me a ❤️")
607
+ gr.Markdown("Try UVR5 UI using Colab [here](https://colab.research.google.com/github/Eddycrack864/UVR5-UI/blob/main/UVR_UI.ipynb)")
608
+ with gr.Tabs():
609
+ with gr.TabItem("BS/Mel Roformer"):
610
  with gr.Row():
611
+ roformer_model = gr.Dropdown(
612
+ label = _("Select the model"),
613
+ choices = list(roformer_models.keys()),
614
+ value = lambda : None,
615
+ interactive = True
616
+ )
617
+ roformer_output_format = gr.Dropdown(
618
+ label = _("Select the output format"),
619
+ choices = output_format,
620
+ value = lambda : None,
621
+ interactive = True
622
+ )
623
+ with gr.Accordion(_("Advanced settings"), open = False):
624
+ with gr.Group():
625
+ with gr.Row():
626
+ roformer_segment_size = gr.Slider(
627
+ label = _("Segment size"),
628
+ info = _("Larger consumes more resources, but may give better results"),
629
+ minimum = 32,
630
+ maximum = 4000,
631
+ step = 32,
632
+ value = 256,
633
+ interactive = True
634
+ )
635
+ roformer_override_segment_size = gr.Checkbox(
636
+ label = _("Override segment size"),
637
+ info = _("Override model default segment size instead of using the model default value"),
638
+ value = False,
639
+ interactive = True
640
+ )
641
+ with gr.Row():
642
+ roformer_overlap = gr.Slider(
643
+ label = _("Overlap"),
644
+ info = _("Amount of overlap between prediction windows"),
645
+ minimum = 2,
646
+ maximum = 10,
647
+ step = 1,
648
+ value = 8,
649
+ interactive = True
650
+ )
651
+ roformer_batch_size = gr.Slider(
652
+ label = _("Batch size"),
653
+ info = _("Larger consumes more RAM but may process slightly faster"),
654
+ minimum = 1,
655
+ maximum = 16,
656
+ step = 1,
657
+ value = 1,
658
+ interactive = True
659
+ )
660
+ with gr.Row():
661
+ roformer_normalization_threshold = gr.Slider(
662
+ label = _("Normalization threshold"),
663
+ info = _("The threshold for audio normalization"),
664
+ minimum = 0.1,
665
+ maximum = 1,
666
+ step = 0.1,
667
+ value = 0.1,
668
+ interactive = True
669
+ )
670
+ roformer_amplification_threshold = gr.Slider(
671
+ label = _("Amplification threshold"),
672
+ info = _("The threshold for audio amplification"),
673
+ minimum = 0.1,
674
+ maximum = 1,
675
+ step = 0.1,
676
+ value = 0.1,
677
+ interactive = True
678
+ )
679
  with gr.Row():
680
+ roformer_audio = gr.Audio(
681
+ label = _("Input audio"),
682
+ type = "filepath",
683
+ interactive = True
684
+ )
685
+ with gr.Accordion(_("Separation by link"), open = False):
686
+ with gr.Row():
687
+ roformer_link = gr.Textbox(
688
+ label = _("Link"),
689
+ placeholder = _("Paste the link here"),
690
+ interactive = True
691
+ )
692
+ with gr.Row():
693
+ gr.Markdown(_("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"))
694
+ with gr.Row():
695
+ roformer_download_button = gr.Button(
696
+ _("Download!"),
697
+ variant = "primary"
698
+ )
699
 
700
+ roformer_download_button.click(download_audio, [roformer_link], [roformer_audio])
701
 
702
+ with gr.Accordion(_("Batch separation"), open = False):
703
+ with gr.Row():
704
+ roformer_input_path = gr.Textbox(
705
+ label = _("Input path"),
706
+ placeholder = _("Place the input path here"),
707
+ interactive = True
708
+ )
709
+ roformer_output_path = gr.Textbox(
710
+ label = _("Output path"),
711
+ placeholder = _("Place the output path here"),
712
+ interactive = True
713
+ )
714
+ with gr.Row():
715
+ roformer_bath_button = gr.Button(_("Separate!"), variant = "primary")
716
+ with gr.Row():
717
+ roformer_info = gr.Textbox(
718
+ label = _("Output information"),
719
+ interactive = False
720
+ )
721
+
722
+ roformer_bath_button.click(roformer_batch, [roformer_input_path, roformer_output_path, roformer_model, roformer_output_format, roformer_segment_size, roformer_override_segment_size, roformer_overlap, roformer_batch_size, roformer_normalization_threshold, roformer_amplification_threshold], [roformer_info])
723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  with gr.Row():
725
+ roformer_button = gr.Button(_("Separate!"), variant = "primary")
726
+ with gr.Row():
727
+ roformer_stem1 = gr.Audio(
728
+ show_download_button = True,
729
+ interactive = False,
730
+ label = _("Stem 1"),
731
+ type = "filepath"
732
+ )
733
+ roformer_stem2 = gr.Audio(
734
+ show_download_button = True,
735
+ interactive = False,
736
+ label = _("Stem 2"),
737
+ type = "filepath"
738
+ )
739
+
740
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_segment_size, roformer_override_segment_size, roformer_overlap, roformer_batch_size, roformer_normalization_threshold, roformer_amplification_threshold], [roformer_stem1, roformer_stem2])
741
+
742
+ with gr.TabItem("MDX23C"):
743
  with gr.Row():
744
+ mdx23c_model = gr.Dropdown(
745
+ label = _("Select the model"),
746
+ choices = mdx23c_models,
747
+ value = lambda : None,
748
+ interactive = True
749
+ )
750
+ mdx23c_output_format = gr.Dropdown(
751
+ label = _("Select the output format"),
752
+ choices = output_format,
753
+ value = lambda : None,
754
+ interactive = True
755
+ )
756
+ with gr.Accordion(_("Advanced settings"), open = False):
757
+ with gr.Group():
758
+ with gr.Row():
759
+ mdx23c_segment_size = gr.Slider(
760
+ minimum = 32,
761
+ maximum = 4000,
762
+ step = 32,
763
+ label = _("Segment size"),
764
+ info = _("Larger consumes more resources, but may give better results"),
765
+ value = 256,
766
+ interactive = True
767
+ )
768
+ mdx23c_override_segment_size = gr.Checkbox(
769
+ label = _("Override segment size"),
770
+ info = _("Override model default segment size instead of using the model default value"),
771
+ value = False,
772
+ interactive = True
773
+ )
774
+ with gr.Row():
775
+ mdx23c_overlap = gr.Slider(
776
+ minimum = 2,
777
+ maximum = 50,
778
+ step = 1,
779
+ label = _("Overlap"),
780
+ info = _("Amount of overlap between prediction windows"),
781
+ value = 8,
782
+ interactive = True
783
+ )
784
+ mdx23c_batch_size = gr.Slider(
785
+ label = _("Batch size"),
786
+ info = _("Larger consumes more RAM but may process slightly faster"),
787
+ minimum = 1,
788
+ maximum = 16,
789
+ step = 1,
790
+ value = 1,
791
+ interactive = True
792
+ )
793
+ with gr.Row():
794
+ mdx23c_normalization_threshold = gr.Slider(
795
+ label = _("Normalization threshold"),
796
+ info = _("The threshold for audio normalization"),
797
+ minimum = 0.1,
798
+ maximum = 1,
799
+ step = 0.1,
800
+ value = 0.1,
801
+ interactive = True
802
+ )
803
+ mdx23c_amplification_threshold = gr.Slider(
804
+ label = _("Amplification threshold"),
805
+ info = _("The threshold for audio amplification"),
806
+ minimum = 0.1,
807
+ maximum = 1,
808
+ step = 0.1,
809
+ value = 0.1,
810
+ interactive = True
811
+ )
812
  with gr.Row():
813
+ mdx23c_audio = gr.Audio(
814
+ label = _("Input audio"),
815
+ type = "filepath",
816
+ interactive = True
817
+ )
818
+ with gr.Accordion(_("Separation by link"), open = False):
819
+ with gr.Row():
820
+ mdx23c_link = gr.Textbox(
821
+ label = _("Link"),
822
+ placeholder = _("Paste the link here"),
823
+ interactive = True
824
+ )
825
+ with gr.Row():
826
+ gr.Markdown(_("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"))
827
+ with gr.Row():
828
+ mdx23c_download_button = gr.Button(
829
+ _("Download!"),
830
+ variant = "primary"
831
+ )
832
 
833
+ mdx23c_download_button.click(download_audio, [mdx23c_link], [mdx23c_audio])
834
 
835
+ with gr.Accordion(_("Batch separation"), open = False):
836
+ with gr.Row():
837
+ mdx23c_input_path = gr.Textbox(
838
+ label = _("Input path"),
839
+ placeholder = _("Place the input path here"),
840
+ interactive = True
841
+ )
842
+ mdx23c_output_path = gr.Textbox(
843
+ label = _("Output path"),
844
+ placeholder = _("Place the output path here"),
845
+ interactive = True
846
+ )
847
+ with gr.Row():
848
+ mdx23c_bath_button = gr.Button(_("Separate!"), variant = "primary")
849
+ with gr.Row():
850
+ mdx23c_info = gr.Textbox(
851
+ label = _("Output information"),
852
+ interactive = False
853
+ )
854
 
855
+ mdx23c_bath_button.click(mdx23c_batch, [mdx23c_input_path, mdx23c_output_path, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_override_segment_size, mdx23c_overlap, mdx23c_batch_size, mdx23c_normalization_threshold, mdx23c_amplification_threshold], [mdx23c_info])
856
+
857
+ with gr.Row():
858
+ mdx23c_button = gr.Button(_("Separate!"), variant = "primary")
859
+ with gr.Row():
860
+ mdx23c_stem1 = gr.Audio(
861
+ show_download_button = True,
862
+ interactive = False,
863
+ label = _("Stem 1"),
864
+ type = "filepath"
865
+ )
866
+ mdx23c_stem2 = gr.Audio(
867
+ show_download_button = True,
868
+ interactive = False,
869
+ label = _("Stem 2"),
870
+ type = "filepath"
871
+ )
872
+
873
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_override_segment_size, mdx23c_overlap, mdx23c_batch_size, mdx23c_normalization_threshold, mdx23c_amplification_threshold], [mdx23c_stem1, mdx23c_stem2])
874
+
875
+ with gr.TabItem("MDX-NET"):
876
+ with gr.Row():
877
+ mdxnet_model = gr.Dropdown(
878
+ label = _("Select the model"),
879
+ choices = mdxnet_models,
880
+ value = lambda : None,
 
 
881
  interactive = True
882
+ )
883
+ mdxnet_output_format = gr.Dropdown(
884
+ label = _("Select the output format"),
885
+ choices = output_format,
886
+ value = lambda : None,
887
+ interactive = True
888
+ )
889
+ with gr.Accordion(_("Advanced settings"), open = False):
890
+ with gr.Group():
891
+ with gr.Row():
892
+ mdxnet_hop_length = gr.Slider(
893
+ label = _("Hop length"),
894
+ info = _("Usually called stride in neural networks; only change if you know what you're doing"),
895
+ minimum = 32,
896
+ maximum = 2048,
897
+ step = 32,
898
+ value = 1024,
899
+ interactive = True
900
+ )
901
+ mdxnet_segment_size = gr.Slider(
902
+ minimum = 32,
903
+ maximum = 4000,
904
+ step = 32,
905
+ label = _("Segment size"),
906
+ info = _("Larger consumes more resources, but may give better results"),
907
+ value = 256,
908
+ interactive = True
909
+ )
910
+ mdxnet_denoise = gr.Checkbox(
911
+ label = _("Denoise"),
912
+ info = _("Enable denoising during separation"),
913
+ value = True,
914
+ interactive = True
915
+ )
916
+ with gr.Row():
917
+ mdxnet_overlap = gr.Slider(
918
+ label = _("Overlap"),
919
+ info = _("Amount of overlap between prediction windows"),
920
+ minimum = 0.001,
921
+ maximum = 0.999,
922
+ step = 0.001,
923
+ value = 0.25,
924
+ interactive = True
925
+ )
926
+ mdxnet_batch_size = gr.Slider(
927
+ label = _("Batch size"),
928
+ info = _("Larger consumes more RAM but may process slightly faster"),
929
+ minimum = 1,
930
+ maximum = 16,
931
+ step = 1,
932
+ value = 1,
933
+ interactive = True
934
+ )
935
+ with gr.Row():
936
+ mdxnet_normalization_threshold = gr.Slider(
937
+ label = _("Normalization threshold"),
938
+ info = _("The threshold for audio normalization"),
939
+ minimum = 0.1,
940
+ maximum = 1,
941
+ step = 0.1,
942
+ value = 0.1,
943
+ interactive = True
944
+ )
945
+ mdxnet_amplification_threshold = gr.Slider(
946
+ label = _("Amplification threshold"),
947
+ info = _("The threshold for audio amplification"),
948
+ minimum = 0.1,
949
+ maximum = 1,
950
+ step = 0.1,
951
+ value = 0.1,
952
+ interactive = True
953
+ )
954
  with gr.Row():
955
+ mdxnet_audio = gr.Audio(
956
+ label = _("Input audio"),
957
+ type = "filepath",
958
+ interactive = True
959
+ )
960
+ with gr.Accordion(_("Separation by link"), open = False):
961
+ with gr.Row():
962
+ mdxnet_link = gr.Textbox(
963
+ label = _("Link"),
964
+ placeholder = _("Paste the link here"),
965
+ interactive = True
966
+ )
967
+ with gr.Row():
968
+ gr.Markdown(_("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"))
969
+ with gr.Row():
970
+ mdxnet_download_button = gr.Button(
971
+ _("Download!"),
972
+ variant = "primary"
973
+ )
974
+
975
+ mdxnet_download_button.click(download_audio, [mdxnet_link], [mdxnet_audio])
976
+
977
+ with gr.Accordion(_("Batch separation"), open = False):
978
+ with gr.Row():
979
+ mdxnet_input_path = gr.Textbox(
980
+ label = _("Input path"),
981
+ placeholder = _("Place the input path here"),
982
+ interactive = True
983
+ )
984
+ mdxnet_output_path = gr.Textbox(
985
+ label = _("Output path"),
986
+ placeholder = _("Place the output path here"),
987
+ interactive = True
988
+ )
989
+ with gr.Row():
990
+ mdxnet_bath_button = gr.Button(_("Separate!"), variant = "primary")
991
+ with gr.Row():
992
+ mdxnet_info = gr.Textbox(
993
+ label = _("Output information"),
994
+ interactive = False
995
+ )
996
+
997
+ mdxnet_bath_button.click(mdxnet_batch, [mdxnet_input_path, mdxnet_output_path, mdxnet_model, mdxnet_output_format, mdxnet_hop_length, mdxnet_segment_size, mdxnet_denoise, mdxnet_overlap, mdxnet_batch_size, mdxnet_normalization_threshold, mdxnet_amplification_threshold], [mdxnet_info])
998
+
999
  with gr.Row():
1000
+ mdxnet_button = gr.Button(_("Separate!"), variant = "primary")
1001
  with gr.Row():
1002
+ mdxnet_stem1 = gr.Audio(
1003
+ show_download_button = True,
1004
+ interactive = False,
1005
+ label = _("Stem 1"),
1006
+ type = "filepath"
1007
+ )
1008
+ mdxnet_stem2 = gr.Audio(
1009
+ show_download_button = True,
1010
+ interactive = False,
1011
+ label = _("Stem 2"),
1012
+ type = "filepath"
1013
+ )
1014
 
1015
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_hop_length, mdxnet_segment_size, mdxnet_denoise, mdxnet_overlap, mdxnet_batch_size, mdxnet_normalization_threshold, mdxnet_amplification_threshold], [mdxnet_stem1, mdxnet_stem2])
1016
 
1017
+ with gr.TabItem("VR ARCH"):
1018
+ with gr.Row():
1019
+ vrarch_model = gr.Dropdown(
1020
+ label = _("Select the model"),
1021
+ choices = vrarch_models,
1022
+ value = lambda : None,
1023
+ interactive = True
1024
+ )
1025
+ vrarch_output_format = gr.Dropdown(
1026
+ label = _("Select the output format"),
1027
+ choices = output_format,
1028
+ value = lambda : None,
1029
+ interactive = True
1030
+ )
1031
+ with gr.Accordion(_("Advanced settings"), open = False):
1032
+ with gr.Group():
1033
+ with gr.Row():
1034
+ vrarch_window_size = gr.Slider(
1035
+ label = _("Window size"),
1036
+ info = _("Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality"),
1037
+ minimum=320,
1038
+ maximum=1024,
1039
+ step=32,
1040
+ value = 512,
1041
+ interactive = True
1042
+ )
1043
+ vrarch_agression = gr.Slider(
1044
+ minimum = 1,
1045
+ maximum = 50,
1046
+ step = 1,
1047
+ label = _("Agression"),
1048
+ info = _("Intensity of primary stem extraction"),
1049
+ value = 5,
1050
+ interactive = True
1051
+ )
1052
+ vrarch_tta = gr.Checkbox(
1053
+ label = _("TTA"),
1054
+ info = _("Enable Test-Time-Augmentation; slow but improves quality"),
1055
+ value = True,
1056
+ visible = True,
1057
+ interactive = True
1058
+ )
1059
+ with gr.Row():
1060
+ vrarch_post_process = gr.Checkbox(
1061
+ label = _("Post process"),
1062
+ info = _("Identify leftover artifacts within vocal output; may improve separation for some songs"),
1063
+ value = False,
1064
+ visible = True,
1065
+ interactive = True
1066
+ )
1067
+ vrarch_post_process_threshold = gr.Slider(
1068
+ label = _("Post process threshold"),
1069
+ info = _("Threshold for post-processing"),
1070
+ minimum = 0.1,
1071
+ maximum = 0.3,
1072
+ step = 0.1,
1073
+ value = 0.2,
1074
+ interactive = True
1075
+ )
1076
+ with gr.Row():
1077
+ vrarch_high_end_process = gr.Checkbox(
1078
+ label = _("High end process"),
1079
+ info = _("Mirror the missing frequency range of the output"),
1080
+ value = False,
1081
+ visible = True,
1082
+ interactive = True,
1083
+ )
1084
+ vrarch_batch_size = gr.Slider(
1085
+ label = _("Batch size"),
1086
+ info = _("Larger consumes more RAM but may process slightly faster"),
1087
+ minimum = 1,
1088
+ maximum = 16,
1089
+ step = 1,
1090
+ value = 1,
1091
+ interactive = True
1092
+ )
1093
+ with gr.Row():
1094
+ vrarch_normalization_threshold = gr.Slider(
1095
+ label = _("Normalization threshold"),
1096
+ info = _("The threshold for audio normalization"),
1097
+ minimum = 0.1,
1098
+ maximum = 1,
1099
+ step = 0.1,
1100
+ value = 0.1,
1101
+ interactive = True
1102
+ )
1103
+ vrarch_amplification_threshold = gr.Slider(
1104
+ label = _("Amplification threshold"),
1105
+ info = _("The threshold for audio amplification"),
1106
+ minimum = 0.1,
1107
+ maximum = 1,
1108
+ step = 0.1,
1109
+ value = 0.1,
1110
+ interactive = True
1111
+ )
1112
+ with gr.Row():
1113
+ vrarch_audio = gr.Audio(
1114
+ label = _("Input audio"),
1115
+ type = "filepath",
1116
+ interactive = True
1117
+ )
1118
+ with gr.Accordion(_("Separation by link"), open = False):
1119
+ with gr.Row():
1120
+ vrarch_link = gr.Textbox(
1121
+ label = _("Link"),
1122
+ placeholder = _("Paste the link here"),
1123
+ interactive = True
1124
+ )
1125
+ with gr.Row():
1126
+ gr.Markdown(_("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"))
1127
+ with gr.Row():
1128
+ vrarch_download_button = gr.Button(
1129
+ _("Download!"),
1130
+ variant = "primary"
1131
+ )
1132
 
1133
+ vrarch_download_button.click(download_audio, [vrarch_link], [vrarch_audio])
1134
+
1135
+ with gr.Accordion(_("Batch separation"), open = False):
1136
+ with gr.Row():
1137
+ vrarch_input_path = gr.Textbox(
1138
+ label = _("Input path"),
1139
+ placeholder = _("Place the input path here"),
1140
+ interactive = True
1141
+ )
1142
+ vrarch_output_path = gr.Textbox(
1143
+ label = _("Output path"),
1144
+ placeholder = _("Place the output path here"),
1145
+ interactive = True
1146
+ )
1147
+ with gr.Row():
1148
+ vrarch_bath_button = gr.Button(_("Separate!"), variant = "primary")
1149
+ with gr.Row():
1150
+ vrarch_info = gr.Textbox(
1151
+ label = _("Output information"),
1152
+ interactive = False
1153
+ )
1154
+
1155
+ vrarch_bath_button.click(vrarch_batch, [vrarch_input_path, vrarch_output_path, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_post_process, vrarch_post_process_threshold, vrarch_high_end_process, vrarch_batch_size, vrarch_normalization_threshold, vrarch_amplification_threshold], [vrarch_info])
1156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1157
  with gr.Row():
1158
+ vrarch_button = gr.Button(_("Separate!"), variant = "primary")
1159
+ with gr.Row():
1160
+ vrarch_stem1 = gr.Audio(
1161
+ show_download_button = True,
1162
+ interactive = False,
1163
+ type = "filepath",
1164
+ label = _("Stem 1")
1165
+ )
1166
+ vrarch_stem2 = gr.Audio(
1167
+ show_download_button = True,
1168
+ interactive = False,
1169
+ type = "filepath",
1170
+ label = _("Stem 2")
1171
+ )
1172
+
1173
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_post_process, vrarch_post_process_threshold, vrarch_high_end_process, vrarch_batch_size, vrarch_normalization_threshold, vrarch_amplification_threshold], [vrarch_stem1, vrarch_stem2])
1174
+
1175
+ with gr.TabItem("Demucs"):
1176
  with gr.Row():
1177
+ demucs_model = gr.Dropdown(
1178
+ label = _("Select the model"),
1179
+ choices = demucs_models,
1180
+ value = lambda : None,
1181
+ interactive = True
1182
+ )
1183
+ demucs_output_format = gr.Dropdown(
1184
+ label = _("Select the output format"),
1185
+ choices = output_format,
1186
+ value = lambda : None,
1187
+ interactive = True
1188
+ )
1189
+ with gr.Accordion(_("Advanced settings"), open = False):
1190
+ with gr.Group():
1191
+ with gr.Row():
1192
+ demucs_shifts = gr.Slider(
1193
+ label = _("Shifts"),
1194
+ info = _("Number of predictions with random shifts, higher = slower but better quality"),
1195
+ minimum = 1,
1196
+ maximum = 20,
1197
+ step = 1,
1198
+ value = 2,
1199
+ interactive = True
1200
+ )
1201
+ demucs_segment_size = gr.Slider(
1202
+ label = _("Segment size"),
1203
+ info = _("Size of segments into which the audio is split. Higher = slower but better quality"),
1204
+ minimum = 1,
1205
+ maximum = 100,
1206
+ step = 1,
1207
+ value = 40,
1208
+ interactive = True
1209
+ )
1210
+ demucs_segments_enabled = gr.Checkbox(
1211
+ label = _("Segment-wise processing"),
1212
+ info = _("Enable segment-wise processing"),
1213
+ value = True,
1214
+ interactive = True
1215
+ )
1216
+ with gr.Row():
1217
+ demucs_overlap = gr.Slider(
1218
+ label = _("Overlap"),
1219
+ info = _("Overlap between prediction windows. Higher = slower but better quality"),
1220
+ minimum=0.001,
1221
+ maximum=0.999,
1222
+ step=0.001,
1223
+ value = 0.25,
1224
+ interactive = True
1225
+ )
1226
+ demucs_batch_size = gr.Slider(
1227
+ label = _("Batch size"),
1228
+ info = _("Larger consumes more RAM but may process slightly faster"),
1229
+ minimum = 1,
1230
+ maximum = 16,
1231
+ step = 1,
1232
+ value = 1,
1233
+ interactive = True
1234
+ )
1235
+ with gr.Row():
1236
+ demucs_normalization_threshold = gr.Slider(
1237
+ label = _("Normalization threshold"),
1238
+ info = _("The threshold for audio normalization"),
1239
+ minimum = 0.1,
1240
+ maximum = 1,
1241
+ step = 0.1,
1242
+ value = 0.1,
1243
+ interactive = True
1244
+ )
1245
+ demucs_amplification_threshold = gr.Slider(
1246
+ label = _("Amplification threshold"),
1247
+ info = _("The threshold for audio amplification"),
1248
+ minimum = 0.1,
1249
+ maximum = 1,
1250
+ step = 0.1,
1251
+ value = 0.1,
1252
+ interactive = True
1253
+ )
1254
  with gr.Row():
1255
+ demucs_audio = gr.Audio(
1256
+ label = _("Input audio"),
1257
+ type = "filepath",
1258
+ interactive = True
1259
+ )
1260
+ with gr.Accordion(_("Separation by link"), open = False):
1261
+ with gr.Row():
1262
+ demucs_link = gr.Textbox(
1263
+ label = _("Link"),
1264
+ placeholder = _("Paste the link here"),
1265
+ interactive = True
1266
+ )
1267
+ with gr.Row():
1268
+ gr.Markdown(_("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"))
1269
+ with gr.Row():
1270
+ demucs_download_button = gr.Button(
1271
+ _("Download!"),
1272
+ variant = "primary"
1273
+ )
1274
 
1275
+ demucs_download_button.click(download_audio, [demucs_link], [demucs_audio])
1276
 
1277
+ with gr.Accordion(_("Batch separation"), open = False):
1278
+ with gr.Row():
1279
+ demucs_input_path = gr.Textbox(
1280
+ label = _("Input path"),
1281
+ placeholder = _("Place the input path here"),
1282
+ interactive = True
1283
+ )
1284
+ demucs_output_path = gr.Textbox(
1285
+ label = _("Output path"),
1286
+ placeholder = _("Place the output path here"),
1287
+ interactive = True
1288
+ )
1289
+ with gr.Row():
1290
+ demucs_bath_button = gr.Button(_("Separate!"), variant = "primary")
1291
+ with gr.Row():
1292
+ demucs_info = gr.Textbox(
1293
+ label = _("Output information"),
1294
+ interactive = False
1295
+ )
1296
 
1297
+ demucs_bath_button.click(demucs_batch, [demucs_input_path, demucs_output_path, demucs_model, demucs_output_format, demucs_shifts, demucs_segment_size, demucs_segments_enabled, demucs_overlap, demucs_batch_size, demucs_normalization_threshold, demucs_amplification_threshold], [demucs_info])
1298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1299
  with gr.Row():
1300
+ demucs_button = gr.Button(_("Separate!"), variant = "primary")
 
 
 
 
1301
  with gr.Row():
1302
+ demucs_stem1 = gr.Audio(
1303
+ show_download_button = True,
1304
+ interactive = False,
1305
+ type = "filepath",
1306
+ label = _("Stem 1")
1307
+ )
1308
+ demucs_stem2 = gr.Audio(
1309
+ show_download_button = True,
1310
+ interactive = False,
1311
+ type = "filepath",
1312
+ label = _("Stem 2")
1313
+ )
1314
  with gr.Row():
1315
+ demucs_stem3 = gr.Audio(
1316
+ show_download_button = True,
1317
+ interactive = False,
1318
+ type = "filepath",
1319
+ label = _("Stem 3")
1320
+ )
1321
+ demucs_stem4 = gr.Audio(
1322
+ show_download_button = True,
1323
+ interactive = False,
1324
+ type = "filepath",
1325
+ label = _("Stem 4")
1326
+ )
1327
+ with gr.Row(visible=False) as stem6:
1328
+ demucs_stem5 = gr.Audio(
1329
+ show_download_button = True,
1330
+ interactive = False,
1331
+ type = "filepath",
1332
+ label = _("Stem 5")
1333
+ )
1334
+ demucs_stem6 = gr.Audio(
1335
+ show_download_button = True,
1336
+ interactive = False,
1337
+ type = "filepath",
1338
+ label = _("Stem 6")
1339
+ )
1340
 
1341
+ demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
1342
+
1343
+ demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_segment_size, demucs_segments_enabled, demucs_overlap, demucs_batch_size, demucs_normalization_threshold, demucs_amplification_threshold], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4, demucs_stem5, demucs_stem6])
1344
 
1345
+ with gr.TabItem(_("Themes")):
1346
+ themes_select = gr.Dropdown(
1347
+ label = _("Theme"),
1348
+ info = _("Select the theme you want to use. (Requires restarting the App)"),
1349
+ choices = loadThemes.get_list(),
1350
+ value = loadThemes.read_json(),
1351
+ visible = True
 
1352
  )
1353
+ dummy_output = gr.Textbox(visible = False)
1354
+
1355
+ themes_select.change(
1356
+ fn = loadThemes.select_theme,
1357
+ inputs = themes_select,
1358
+ outputs = [dummy_output]
 
 
 
 
 
 
1359
  )
1360
+
1361
+ with gr.TabItem(_("Credits")):
1362
+ gr.Markdown(
1363
+ """
1364
+ UVR5 UI created by **[Eddycrack 864](https://github.com/Eddycrack864).** Join **[AI HUB](https://discord.gg/aihub)** community.
1365
+ * python-audio-separator by [beveradb](https://github.com/beveradb).
1366
+ * gradio-i18n by [hoveychen](https://github.com/hoveychen)
1367
+ * Special thanks to [Ilaria](https://github.com/TheStingerX) for hosting this space and help.
1368
+ * Thanks to [Mikus](https://github.com/cappuch) for the help with the code.
1369
+ * Thanks to [Nick088](https://huggingface.co/Nick088) for the help to fix roformers.
1370
+ * Thanks to [yt_dlp](https://github.com/yt-dlp/yt-dlp) devs.
1371
+ * Separation by link source code and improvements by [Blane187](https://huggingface.co/Blane187).
1372
+ * Thanks to [ArisDev](https://github.com/aris-py) for porting UVR5 UI to Kaggle and improvements.
1373
+ * Thanks to [Bebra777228](https://github.com/Bebra777228)'s code for guiding me to improve my code.
1374
+
1375
+
1376
+ You can donate to the original UVR5 project here:
1377
+ [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
1378
+ """
1379
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1380
 
1381
  app.queue()
1382
  app.launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- numpy==1.25.2
2
- audio-separator[gpu]==0.17.5
3
  scipy
4
- yt_dlp
 
 
1
+ audio-separator[gpu]==0.24.1
 
2
  scipy
3
+ yt_dlp
4
+ gradio_i18n