justyoung commited on
Commit
b55cf8d
·
verified ·
1 Parent(s): c5ca859

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -92
app.py CHANGED
@@ -1,12 +1,53 @@
1
  import gradio as gr
 
2
  import yt_dlp
3
  import ffmpeg
4
  import subprocess
5
  import numpy as np
6
  import librosa
7
  import soundfile
8
- #from __future__ import unicode_literals
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class Slicer:
11
  def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):
12
  if not min_length >= min_interval >= hop_size:
@@ -94,102 +135,54 @@ class Slicer:
94
  chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))
95
  return chunks
96
 
97
- def get_rms(y, frame_length=2048, hop_length=512, pad_mode="constant"):
98
- padding = (int(frame_length // 2), int(frame_length // 2))
99
- y = np.pad(y, padding, mode=pad_mode)
100
- axis = -1
101
- out_strides = y.strides + tuple([y.strides[axis]])
102
- x_shape_trimmed = list(y.shape)
103
- x_shape_trimmed[axis] -= frame_length - 1
104
- out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
105
- xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
106
- if axis < 0:
107
- target_axis = axis - 1
108
- else:
109
- target_axis = axis + 1
110
- xw = np.moveaxis(xw, -1, target_axis)
111
- slices = [slice(None)] * xw.ndim
112
- slices[axis] = slice(0, None, hop_length)
113
- x = xw[tuple(slices)]
114
- power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
115
- return np.sqrt(power)
116
-
117
- def download_audio(dataset, url, drive_path, audio_name):
118
  if dataset == "Drive":
119
- return "Dataset is set to Drive. Skipping download."
120
  elif dataset == "Youtube":
121
- ydl_opts = {
122
- 'format': 'bestaudio/best',
123
- 'postprocessors': [{
124
- 'key': 'FFmpegExtractAudio',
125
- 'preferredcodec': 'wav',
126
- }],
127
- "outtmpl": f'/content/youtubeaudio/{audio_name}',
128
- }
129
- def download_from_url(url):
130
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
131
- ydl.download([url])
132
-
133
- download_from_url(url)
134
- return f'Audio downloaded and saved as /content/youtubeaudio/{audio_name}.wav'
135
-
136
- def separate_audio(dataset, audio_name, drive_path):
137
  if dataset == "Drive":
138
- audio_input = drive_path
139
  elif dataset == "Youtube":
140
- audio_input = f"/content/youtubeaudio/{audio_name}.wav"
141
- command = f"demucs --two-stems=vocals {audio_input}"
142
- result = subprocess.run(command.split(), stdout=subprocess.PIPE)
143
- subprocess.run(f"!mkdir -p /content/audio/{audio_name}", shell=True)
144
- subprocess.run(f"!cp -r /content/separated/htdemucs/{audio_name}/* /content/audio/{audio_name}", shell=True)
145
- if dataset == "Youtube":
146
- subprocess.run(f"!cp -r /content/youtubeaudio/{audio_name}.wav /content/audio/{audio_name}", shell=True)
147
- return result.stdout.decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- def split_audio(audio_name):
150
- audio, sr = librosa.load(f'/content/separated/htdemucs/{audio_name}/vocals.wav', sr=None, mono=False)
151
- slicer = Slicer(
152
- sr=sr,
153
- threshold=-40,
154
- min_length=5000,
155
- min_interval=500,
156
- hop_size=10,
157
- max_sil_kept=500
 
 
 
 
 
 
158
  )
159
- chunks = slicer.slice(audio)
160
- subprocess.run(f"!mkdir -p /content/dataset/{audio_name}", shell=True)
161
- for i, chunk in enumerate(chunks):
162
- if len(chunk.shape) > 1:
163
- chunk = chunk.T
164
- soundfile.write(f'/content/dataset/{audio_name}/split_{i}.wav', chunk, sr)
165
- subprocess.run(f"!mkdir -p /content/dataset/{audio_name}", shell=True)
166
- subprocess.run(f"!cp -r /content/dataset/{audio_name}/* /content/dataset/{audio_name}", shell=True)
167
- return "Audio split into chunks and saved."
168
-
169
- def process_audio(mode, dataset, url, drive_path, audio_name):
170
- download_result = download_audio(dataset, url, drive_path, audio_name)
171
- if "Skipping download" not in download_result:
172
- separate_result = separate_audio(dataset, audio_name, drive_path)
173
- if mode == "Splitting":
174
- split_result = split_audio(audio_name)
175
- return f"{download_result}\n{separate_result}\n{split_result}"
176
- else:
177
- return f"{download_result}\n{separate_result}\nMode is set to Separate. Skipping splitting."
178
- else:
179
- return download_result
180
-
181
- demo = gr.Interface(
182
- fn=process_audio,
183
- inputs=[
184
- gr.Dropdown(choices=["Splitting", "Separate"], label="Mode"),
185
- gr.Dropdown(choices=["Youtube", "Drive"], label="Dataset"),
186
- gr.Textbox(label="URL"),
187
- gr.Textbox(label="Drive Path"),
188
- gr.Textbox(label="Audio Name"),
189
- ],
190
- outputs="text",
191
- title="Dataset Maker",
192
- description="Process audio from Youtube or Drive and split it based on silence detection."
193
- )
194
 
195
  demo.launch()
 
1
  import gradio as gr
2
+ #from __future__ import unicode_literals
3
  import yt_dlp
4
  import ffmpeg
5
  import subprocess
6
  import numpy as np
7
  import librosa
8
  import soundfile
 
9
 
10
+ # Function to download audio from YouTube
11
+ def download_audio(url, audio_name):
12
+ ydl_opts = {
13
+ 'format': 'bestaudio/best',
14
+ 'postprocessors': [{
15
+ 'key': 'FFmpegExtractAudio',
16
+ 'preferredcodec': 'wav',
17
+ }],
18
+ "outtmpl": f'youtubeaudio/{audio_name}',
19
+ }
20
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
21
+ ydl.download([url])
22
+
23
+ # Function to separate vocals using demucs
24
+ def separate_vocals(audio_path, audio_name):
25
+ command = f"demucs --two-stems=vocals {audio_path}"
26
+ result = subprocess.run(command.split(), stdout=subprocess.PIPE)
27
+ print(result.stdout.decode())
28
+ subprocess.run(f"!mkdir -p /content/audio/{audio_name}", shell=True)
29
+ subprocess.run(f"!cp -r /content/separated/htdemucs/{audio_name}/* /content/audio/{audio_name}", shell=True)
30
+ subprocess.run(f"!cp -r /content/youtubeaudio/{audio_name}.wav /content/audio/{audio_name}", shell=True)
31
+
32
+ # RMS function from librosa
33
+ def get_rms(y, frame_length=2048, hop_length=512, pad_mode="constant"):
34
+ padding = (int(frame_length // 2), int(frame_length // 2))
35
+ y = np.pad(y, padding, mode=pad_mode)
36
+ axis = -1
37
+ out_strides = y.strides + tuple([y.strides[axis]])
38
+ x_shape_trimmed = list(y.shape)
39
+ x_shape_trimmed[axis] -= frame_length - 1
40
+ out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
41
+ xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
42
+ target_axis = axis + 1 if axis >= 0 else axis - 1
43
+ xw = np.moveaxis(xw, -1, target_axis)
44
+ slices = [slice(None)] * xw.ndim
45
+ slices[axis] = slice(0, None, hop_length)
46
+ x = xw[tuple(slices)]
47
+ power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
48
+ return np.sqrt(power)
49
+
50
+ # Slicer class to split audio
51
  class Slicer:
52
  def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):
53
  if not min_length >= min_interval >= hop_size:
 
135
  chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))
136
  return chunks
137
 
138
+ def process_audio(mode, dataset, url, drive_path, audio_name):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  if dataset == "Drive":
140
+ print("Dataset is set to Drive. Skipping this section")
141
  elif dataset == "Youtube":
142
+ download_audio(url, audio_name)
143
+
144
+ audio_input = f"/content/youtubeaudio/{audio_name}.wav"
145
+
 
 
 
 
 
 
 
 
 
 
 
 
146
  if dataset == "Drive":
147
+ command = f"demucs --two-stems=vocals {drive_path}"
148
  elif dataset == "Youtube":
149
+ command = f"demucs --two-stems=vocals {audio_input}"
150
+
151
+ subprocess.run(command.split(), stdout=subprocess.PIPE)
152
+
153
+ if mode == "Splitting":
154
+ audio, sr = librosa.load(f'/content/separated/htdemucs/{audio_name}/vocals.wav', sr=None, mono=False)
155
+ slicer = Slicer(
156
+ sr=sr,
157
+ threshold=-40,
158
+ min_length=5000,
159
+ min_interval=500,
160
+ hop_size=10,
161
+ max_sil_kept=500
162
+ )
163
+ chunks = slicer.slice(audio)
164
+ for i, chunk in enumerate(chunks):
165
+ if len(chunk.shape) > 1:
166
+ chunk = chunk.T
167
+ soundfile.write(f'/content/dataset/{audio_name}/split_{i}.wav', chunk, sr)
168
+
169
+ return f"Processing complete for {audio_name}"
170
 
171
+ with gr.Blocks() as demo:
172
+ with gr.Column():
173
+ gr.Markdown("# Dataset Maker")
174
+ mode = gr.Dropdown(choices=["Splitting", "Separate"], label="Mode")
175
+ dataset = gr.Dropdown(choices=["Youtube", "Drive"], label="Dataset")
176
+ url = gr.Textbox(label="URL")
177
+ drive_path = gr.Textbox(label="Drive Path")
178
+ audio_name = gr.Textbox(label="Audio Name")
179
+ output = gr.Textbox(label="Output")
180
+ process_button = gr.Button("Process")
181
+
182
+ process_button.click(
183
+ process_audio,
184
+ inputs=[mode, dataset, url, drive_path, audio_name],
185
+ outputs=[output]
186
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  demo.launch()