Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ import datetime
|
|
18 |
import librosa
|
19 |
import joblib
|
20 |
import warnings
|
|
|
|
|
|
|
21 |
|
22 |
# Suppress warnings for cleaner logs
|
23 |
warnings.filterwarnings("ignore")
|
@@ -51,7 +54,7 @@ def apply_reverb(audio):
|
|
51 |
return audio.overlay(reverb, position=1000)
|
52 |
|
53 |
def apply_pitch_shift(audio, semitones=-2):
|
54 |
-
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))
|
55 |
samples = np.array(audio.get_array_of_samples())
|
56 |
resampled = np.interp(
|
57 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
@@ -172,7 +175,6 @@ def detect_genre(audio_path):
|
|
172 |
try:
|
173 |
y, sr = torchaudio.load(audio_path)
|
174 |
mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
|
175 |
-
# Dummy classifier β replace with real one later
|
176 |
return "Speech"
|
177 |
except Exception:
|
178 |
return "Unknown"
|
@@ -195,7 +197,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
195 |
try:
|
196 |
audio = AudioSegment.from_file(audio_file)
|
197 |
status = "π Applying effects..."
|
198 |
-
|
199 |
effect_map = {
|
200 |
"Noise Reduction": apply_noise_reduction,
|
201 |
"Compress Dynamic Range": apply_compression,
|
@@ -214,7 +216,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
214 |
audio = effect_map[effect_name](audio)
|
215 |
|
216 |
status = "πΎ Saving final audio..."
|
217 |
-
with tempfile.NamedTemporaryFile(delete=
|
218 |
if isolate_vocals:
|
219 |
temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
|
220 |
audio.export(temp_input, format="wav")
|
@@ -262,7 +264,87 @@ def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, ex
|
|
262 |
except Exception as e:
|
263 |
return None, f"β Batch processing failed: {str(e)}"
|
264 |
|
265 |
-
# ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
effect_options = [
|
267 |
"Noise Reduction",
|
268 |
"Compress Dynamic Range",
|
@@ -275,13 +357,10 @@ effect_options = [
|
|
275 |
"Normalize"
|
276 |
]
|
277 |
|
278 |
-
# === Multi-Tab UI ===
|
279 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
280 |
-
gr.Markdown(""
|
281 |
-
# π§ AI Audio Studio β Powered by Hugging Face & Demucs
|
282 |
-
Upload, edit, and export audio with AI-powered tools.
|
283 |
-
""")
|
284 |
|
|
|
285 |
with gr.Tab("π΅ Single File Studio"):
|
286 |
gr.Interface(
|
287 |
fn=process_audio,
|
@@ -306,11 +385,12 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
306 |
clear_btn=None
|
307 |
)
|
308 |
|
|
|
309 |
with gr.Tab("π Batch Processing"):
|
310 |
gr.Interface(
|
311 |
fn=batch_process_audio,
|
312 |
inputs=[
|
313 |
-
gr.File(label="Upload Multiple
|
314 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
315 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
316 |
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
|
@@ -327,7 +407,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
327 |
clear_btn=None
|
328 |
)
|
329 |
|
330 |
-
|
|
|
331 |
gr.Interface(
|
332 |
fn=stem_split,
|
333 |
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
@@ -343,4 +424,90 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
343 |
clear_btn=None
|
344 |
)
|
345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
demo.launch()
|
|
|
18 |
import librosa
|
19 |
import joblib
|
20 |
import warnings
|
21 |
+
from faster_whisper import WhisperModel
|
22 |
+
from mutagen.mp3 import MP3
|
23 |
+
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
24 |
|
25 |
# Suppress warnings for cleaner logs
|
26 |
warnings.filterwarnings("ignore")
|
|
|
54 |
return audio.overlay(reverb, position=1000)
|
55 |
|
56 |
def apply_pitch_shift(audio, semitones=-2):
|
57 |
+
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))
|
58 |
samples = np.array(audio.get_array_of_samples())
|
59 |
resampled = np.interp(
|
60 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
|
|
175 |
try:
|
176 |
y, sr = torchaudio.load(audio_path)
|
177 |
mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
|
|
|
178 |
return "Speech"
|
179 |
except Exception:
|
180 |
return "Unknown"
|
|
|
197 |
try:
|
198 |
audio = AudioSegment.from_file(audio_file)
|
199 |
status = "π Applying effects..."
|
200 |
+
|
201 |
effect_map = {
|
202 |
"Noise Reduction": apply_noise_reduction,
|
203 |
"Compress Dynamic Range": apply_compression,
|
|
|
216 |
audio = effect_map[effect_name](audio)
|
217 |
|
218 |
status = "πΎ Saving final audio..."
|
219 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
220 |
if isolate_vocals:
|
221 |
temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
|
222 |
audio.export(temp_input, format="wav")
|
|
|
264 |
except Exception as e:
|
265 |
return None, f"β Batch processing failed: {str(e)}"
|
266 |
|
267 |
+
# === Whisper Transcription Tab ===
|
268 |
+
whisper_model = WhisperModel("base")
|
269 |
+
|
270 |
+
def transcribe_audio(audio_path):
|
271 |
+
segments, info = whisper_model.transcribe(audio_path, beam_size=5)
|
272 |
+
text = " ".join([seg.text for seg in segments])
|
273 |
+
return text
|
274 |
+
|
275 |
+
# === TTS Tab ===
|
276 |
+
from TTS.api import TTS
|
277 |
+
|
278 |
+
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
279 |
+
|
280 |
+
def generate_tts(text):
|
281 |
+
out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
|
282 |
+
tts.tts_to_file(text=text, file_path=out_path)
|
283 |
+
return out_path
|
284 |
+
|
285 |
+
# === Analyze Audio Stats ===
|
286 |
+
def analyze_audio(audio_path):
|
287 |
+
y, sr = torchaudio.load(audio_path)
|
288 |
+
rms = np.mean(librosa.feature.rms(y=y.numpy().flatten()))
|
289 |
+
tempo, _ = librosa.beat.beat_track(y=y.numpy().flatten(), sr=sr)
|
290 |
+
silence_ratio = np.mean(np.abs(y.numpy()) < 0.01)
|
291 |
+
|
292 |
+
plt.figure(figsize=(10, 4))
|
293 |
+
plt.plot(y.numpy().flatten(), color="lightblue")
|
294 |
+
plt.title("Loudness Over Time")
|
295 |
+
plt.tight_layout()
|
296 |
+
buf = BytesIO()
|
297 |
+
plt.savefig(buf, format="png")
|
298 |
+
plt.close()
|
299 |
+
image = Image.open(buf)
|
300 |
+
|
301 |
+
stats = {
|
302 |
+
"rms_loudness": float(rms),
|
303 |
+
"silence_ratio": float(silence_ratio),
|
304 |
+
"tempo_bpm": int(tempo)
|
305 |
+
}
|
306 |
+
|
307 |
+
return stats, image
|
308 |
+
|
309 |
+
# === Vocal Removal (Karaoke Mode) ===
|
310 |
+
def vocal_removal(audio_path):
|
311 |
+
stems = stem_split(audio_path)
|
312 |
+
instrumental = stems[0] + stems[1] + stems[2] # drums + bass + other
|
313 |
+
out_path = os.path.join(tempfile.gettempdir(), "instrumental.wav")
|
314 |
+
torchaudio.save(out_path, instrumental, 44100)
|
315 |
+
return out_path
|
316 |
+
|
317 |
+
# === Metadata Tagging ===
|
318 |
+
def tag_mp3(file_path, title, artist, album, year):
|
319 |
+
try:
|
320 |
+
audio = MP3(file_path)
|
321 |
+
try:
|
322 |
+
audio.tags = ID3()
|
323 |
+
except:
|
324 |
+
audio.add_tags()
|
325 |
+
audio.tags.add(TIT2(encoding=3, text=title))
|
326 |
+
audio.tags.add(TPE1(encoding=3, text=artist))
|
327 |
+
if album:
|
328 |
+
audio.tags.add(TALB(encoding=3, text=album))
|
329 |
+
if year:
|
330 |
+
audio.tags.add(TYER(encoding=3, text=str(year)))
|
331 |
+
audio.save()
|
332 |
+
return file_path
|
333 |
+
except Exception as e:
|
334 |
+
return None
|
335 |
+
|
336 |
+
# === Voice Style Transfer (Dummy) ===
|
337 |
+
def apply_style_transfer(audio_path, mood="Happy"):
|
338 |
+
# Replace with real model later
|
339 |
+
return audio_path
|
340 |
+
|
341 |
+
# === Session Sharing (URL Encode) ===
|
342 |
+
def encode_preset(selected_effects, preset_name, export_format):
|
343 |
+
data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
|
344 |
+
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
345 |
+
return f"https://huggingface.co/spaces/YOUR_USERNAME/AudioMaster?preset={encoded}"
|
346 |
+
|
347 |
+
# === UI ===
|
348 |
effect_options = [
|
349 |
"Noise Reduction",
|
350 |
"Compress Dynamic Range",
|
|
|
357 |
"Normalize"
|
358 |
]
|
359 |
|
|
|
360 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
361 |
+
gr.Markdown("## π§ AI Audio Studio β The Ultimate AI-Powered Tool\nUpload, edit, and export polished tracks β all powered by AI!")
|
|
|
|
|
|
|
362 |
|
363 |
+
# --- Single File Studio ---
|
364 |
with gr.Tab("π΅ Single File Studio"):
|
365 |
gr.Interface(
|
366 |
fn=process_audio,
|
|
|
385 |
clear_btn=None
|
386 |
)
|
387 |
|
388 |
+
# --- Batch Processing ---
|
389 |
with gr.Tab("π Batch Processing"):
|
390 |
gr.Interface(
|
391 |
fn=batch_process_audio,
|
392 |
inputs=[
|
393 |
+
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
394 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
395 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
396 |
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
|
|
|
407 |
clear_btn=None
|
408 |
)
|
409 |
|
410 |
+
# --- Remix Mode ---
|
411 |
+
with gr.Tab("π Remix Mode"):
|
412 |
gr.Interface(
|
413 |
fn=stem_split,
|
414 |
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
|
|
424 |
clear_btn=None
|
425 |
)
|
426 |
|
427 |
+
# --- Transcribe & Edit ---
|
428 |
+
with gr.Tab("π Transcribe & Edit"):
|
429 |
+
gr.Interface(
|
430 |
+
fn=transcribe_audio,
|
431 |
+
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
432 |
+
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
433 |
+
title="Transcribe & Edit Spoken Content",
|
434 |
+
description="Convert voice to text, then edit the script before exporting again."
|
435 |
+
)
|
436 |
+
|
437 |
+
# --- TTS Voice Generator ---
|
438 |
+
with gr.Tab("π¬ TTS Voice Generator"):
|
439 |
+
gr.Interface(
|
440 |
+
fn=generate_tts,
|
441 |
+
inputs=gr.Textbox(label="Enter Text", lines=5),
|
442 |
+
outputs=gr.Audio(label="Generated Speech", type="filepath"),
|
443 |
+
title="Text-to-Speech Generator",
|
444 |
+
description="Type anything and turn it into natural-sounding speech."
|
445 |
+
)
|
446 |
+
|
447 |
+
# --- Audio Analysis Dashboard ---
|
448 |
+
with gr.Tab("π Audio Analysis"):
|
449 |
+
gr.Interface(
|
450 |
+
fn=analyze_audio,
|
451 |
+
inputs=gr.Audio(label="Upload Track", type="filepath"),
|
452 |
+
outputs=[
|
453 |
+
gr.JSON(label="Audio Stats"),
|
454 |
+
gr.Image(label="Waveform Graph")
|
455 |
+
],
|
456 |
+
title="View Loudness, BPM, Silence, and More",
|
457 |
+
description="Analyze audio loudness, tempo, and frequency content."
|
458 |
+
)
|
459 |
+
|
460 |
+
# --- Voice Style Transfer ---
|
461 |
+
with gr.Tab("π§ Voice Style Transfer"):
|
462 |
+
gr.Interface(
|
463 |
+
fn=apply_style_transfer,
|
464 |
+
inputs=[
|
465 |
+
gr.Audio(label="Upload Voice Clip", type="filepath"),
|
466 |
+
gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
|
467 |
+
],
|
468 |
+
outputs=gr.Audio(label="Stylized Output", type="filepath"),
|
469 |
+
title="Change Emotional Tone of Voice",
|
470 |
+
description="Shift the emotional style of any voice clip."
|
471 |
+
)
|
472 |
+
|
473 |
+
# --- Session Sharing ---
|
474 |
+
with gr.Tab("π§Ύ Session Sharing"):
|
475 |
+
gr.Interface(
|
476 |
+
fn=encode_preset,
|
477 |
+
inputs=[
|
478 |
+
gr.CheckboxGroup(choices=effect_options, label="Effects"),
|
479 |
+
gr.Dropdown(choices=preset_names, label="Preset"),
|
480 |
+
gr.Dropdown(choices=["MP3", "WAV"], label="Format")
|
481 |
+
],
|
482 |
+
outputs=gr.Textbox(label="Shareable Link", lines=1),
|
483 |
+
title="Save Your Settings and Share Them",
|
484 |
+
description="Generate a link to share your effect chain with others."
|
485 |
+
)
|
486 |
+
|
487 |
+
# --- Vocal Removal (Karaoke Mode) ---
|
488 |
+
with gr.Tab("π― Vocal Removal (Karaoke Mode)"):
|
489 |
+
gr.Interface(
|
490 |
+
fn=vocal_removal,
|
491 |
+
inputs=gr.Audio(label="Upload Song", type="filepath"),
|
492 |
+
outputs=gr.Audio(label="Instrumental Only", type="filepath"),
|
493 |
+
title="Remove Vocals from Any Track",
|
494 |
+
description="Create karaoke versions using AI"
|
495 |
+
)
|
496 |
+
|
497 |
+
# --- Metadata Tagging ---
|
498 |
+
with gr.Tab("π Add MP3 Tags"):
|
499 |
+
gr.Interface(
|
500 |
+
fn=tag_mp3,
|
501 |
+
inputs=[
|
502 |
+
gr.File(label="Upload MP3/WAV"),
|
503 |
+
gr.Textbox(label="Title"),
|
504 |
+
gr.Textbox(label="Artist"),
|
505 |
+
gr.Textbox(label="Album"),
|
506 |
+
gr.Number(label="Year")
|
507 |
+
],
|
508 |
+
outputs=gr.File(label="Tagged Audio File"),
|
509 |
+
title="Add Title, Artist, Album, Year to MP3",
|
510 |
+
description="Enhance your exported files with metadata tags"
|
511 |
+
)
|
512 |
+
|
513 |
demo.launch()
|