Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ from faster_whisper import WhisperModel
|
|
22 |
from mutagen.mp3 import MP3
|
23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
24 |
|
25 |
-
# Suppress warnings
|
26 |
warnings.filterwarnings("ignore")
|
27 |
|
28 |
# === Helper Functions ===
|
@@ -54,7 +54,7 @@ def apply_reverb(audio):
|
|
54 |
return audio.overlay(reverb, position=1000)
|
55 |
|
56 |
def apply_pitch_shift(audio, semitones=-2):
|
57 |
-
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
|
58 |
samples = np.array(audio.get_array_of_samples())
|
59 |
resampled = np.interp(
|
60 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
@@ -306,47 +306,52 @@ def analyze_audio(audio_path):
|
|
306 |
|
307 |
return stats, image
|
308 |
|
309 |
-
# ===
|
310 |
-
def
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
return out_path
|
316 |
|
317 |
-
# ===
|
318 |
-
def
|
319 |
-
|
320 |
-
audio
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
audio.tags.add(TALB(encoding=3, text=album))
|
329 |
-
if year:
|
330 |
-
audio.tags.add(TYER(encoding=3, text=str(year)))
|
331 |
-
audio.save()
|
332 |
-
return file_path
|
333 |
-
except Exception as e:
|
334 |
-
return None
|
335 |
-
|
336 |
-
# === Voice Style Transfer (Dummy) ===
|
337 |
-
def apply_style_transfer(audio_path, mood="Happy"):
|
338 |
-
# Replace with real model later
|
339 |
-
return audio_path
|
340 |
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
|
346 |
-
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
347 |
-
return f"https://huggingface.co/spaces/tee342/AudioMaster?preset={encoded}"
|
348 |
|
349 |
-
# === UI ===
|
350 |
effect_options = [
|
351 |
"Noise Reduction",
|
352 |
"Compress Dynamic Range",
|
@@ -360,7 +365,7 @@ effect_options = [
|
|
360 |
]
|
361 |
|
362 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
363 |
-
gr.Markdown("## π§ AI Audio Studio
|
364 |
|
365 |
# --- Single File Studio ---
|
366 |
with gr.Tab("π΅ Single File Studio"):
|
@@ -433,7 +438,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
433 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
434 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
435 |
title="Transcribe & Edit Spoken Content",
|
436 |
-
description="Convert voice to text
|
437 |
)
|
438 |
|
439 |
# --- TTS Voice Generator ---
|
@@ -446,70 +451,68 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
446 |
description="Type anything and turn it into natural-sounding speech."
|
447 |
)
|
448 |
|
449 |
-
# ---
|
450 |
-
with gr.Tab("
|
451 |
gr.Interface(
|
452 |
-
fn=
|
453 |
-
inputs=
|
454 |
-
|
455 |
-
gr.
|
456 |
-
gr.Image(label="Waveform Graph")
|
457 |
],
|
458 |
-
|
459 |
-
|
|
|
460 |
)
|
461 |
|
462 |
-
# --- Voice
|
463 |
-
with gr.Tab("
|
464 |
gr.Interface(
|
465 |
-
fn=
|
466 |
inputs=[
|
467 |
-
gr.
|
468 |
-
gr.
|
469 |
],
|
470 |
-
outputs=gr.Audio(label="
|
471 |
-
title="
|
472 |
-
description="
|
473 |
)
|
474 |
|
475 |
-
# ---
|
476 |
-
with gr.Tab("
|
477 |
gr.Interface(
|
478 |
-
fn=
|
479 |
-
inputs=
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
],
|
484 |
-
outputs=gr.Textbox(label="Shareable Link", lines=1),
|
485 |
-
title="Save Your Settings and Share Them",
|
486 |
-
description="Generate a link to share your effect chain with others."
|
487 |
)
|
488 |
|
489 |
-
# ---
|
490 |
-
with gr.Tab("
|
491 |
gr.Interface(
|
492 |
-
fn=
|
493 |
-
inputs=
|
494 |
-
|
495 |
-
|
496 |
-
|
|
|
|
|
|
|
|
|
497 |
)
|
498 |
|
499 |
-
# ---
|
500 |
-
with gr.Tab("
|
501 |
gr.Interface(
|
502 |
-
fn=
|
503 |
inputs=[
|
504 |
-
gr.File(label="
|
505 |
-
gr.
|
506 |
-
gr.
|
507 |
-
gr.Textbox(label="Album"),
|
508 |
-
gr.Number(label="Year")
|
509 |
],
|
510 |
-
outputs=gr.File(label="
|
511 |
-
title="
|
512 |
-
description="
|
513 |
)
|
514 |
|
515 |
demo.launch()
|
|
|
22 |
from mutagen.mp3 import MP3
|
23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
24 |
|
25 |
+
# Suppress warnings
|
26 |
warnings.filterwarnings("ignore")
|
27 |
|
28 |
# === Helper Functions ===
|
|
|
54 |
return audio.overlay(reverb, position=1000)
|
55 |
|
56 |
def apply_pitch_shift(audio, semitones=-2):
|
57 |
+
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
|
58 |
samples = np.array(audio.get_array_of_samples())
|
59 |
resampled = np.interp(
|
60 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
|
|
306 |
|
307 |
return stats, image
|
308 |
|
309 |
+
# === Auto-Save Sessions ===
|
310 |
+
def auto_save_session(data):
|
311 |
+
import base64
|
312 |
+
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
313 |
+
return f"https://your-studio-url?load={encoded}"
|
314 |
+
|
315 |
+
# === Voiceprint Matching ===
|
316 |
+
from resemblyzer import preprocess_wav, VoiceEncoder
|
317 |
+
|
318 |
+
encoder = VoiceEncoder()
|
319 |
+
|
320 |
+
def match_speakers(clip1, clip2):
|
321 |
+
wav1 = preprocess_wav(clip1)
|
322 |
+
wav2 = preprocess_wav(clip2)
|
323 |
+
embed1 = encoder.embed_utterance(wav1)
|
324 |
+
embed2 = encoder.embed_utterance(wav2)
|
325 |
+
similarity = np.inner(embed1, embed2)
|
326 |
+
return f"Speaker Match Score: {similarity:.2f}"
|
327 |
+
|
328 |
+
# === Mix Two Tracks ===
|
329 |
+
def mix_tracks(track1, track2, volume_offset=0):
|
330 |
+
a1 = AudioSegment.from_file(track1)
|
331 |
+
a2 = AudioSegment.from_file(track2)
|
332 |
+
mixed = a1.overlay(a2 - volume_offset)
|
333 |
+
out_path = os.path.join(tempfile.gettempdir(), "mixed.wav")
|
334 |
+
mixed.export(out_path, format="wav")
|
335 |
return out_path
|
336 |
|
337 |
+
# === Save/Load Project File (.aiproj) ===
|
338 |
+
def save_project(audio_path, preset_name, effects):
|
339 |
+
project_data = {
|
340 |
+
"audio": AudioSegment.from_file(audio_path).raw_data,
|
341 |
+
"preset": preset_name,
|
342 |
+
"effects": effects
|
343 |
+
}
|
344 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
345 |
+
with open(out_path, "wb") as f:
|
346 |
+
pickle.dump(project_data, f)
|
347 |
+
return out_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
+
def load_project(project_file):
|
350 |
+
with open(project_file.name, "rb") as f:
|
351 |
+
data = pickle.load(f)
|
352 |
+
return data["preset"], data["effects"]
|
|
|
|
|
|
|
353 |
|
354 |
+
# === UI ===
|
355 |
effect_options = [
|
356 |
"Noise Reduction",
|
357 |
"Compress Dynamic Range",
|
|
|
365 |
]
|
366 |
|
367 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
368 |
+
gr.Markdown("## π§ Ultimate AI Audio Studio\nUpload, edit, export β powered by AI!")
|
369 |
|
370 |
# --- Single File Studio ---
|
371 |
with gr.Tab("π΅ Single File Studio"):
|
|
|
438 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
439 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
440 |
title="Transcribe & Edit Spoken Content",
|
441 |
+
description="Convert voice to text and edit it before re-exporting."
|
442 |
)
|
443 |
|
444 |
# --- TTS Voice Generator ---
|
|
|
451 |
description="Type anything and turn it into natural-sounding speech."
|
452 |
)
|
453 |
|
454 |
+
# --- Voiceprint Matching ---
|
455 |
+
with gr.Tab("π§ββοΈ Match Speakers"):
|
456 |
gr.Interface(
|
457 |
+
fn=match_speakers,
|
458 |
+
inputs=[
|
459 |
+
gr.File(label="Clip 1"),
|
460 |
+
gr.File(label="Clip 2")
|
|
|
461 |
],
|
462 |
+
outputs=gr.Textbox(label="Match Score", lines=1),
|
463 |
+
title="Are These the Same Person?",
|
464 |
+
description="Detect speaker similarity using AI."
|
465 |
)
|
466 |
|
467 |
+
# --- Voice Cloning (AI Dubbing) ---
|
468 |
+
with gr.Tab("π Voice Cloning (AI Dubbing)"):
|
469 |
gr.Interface(
|
470 |
+
fn=clone_voice,
|
471 |
inputs=[
|
472 |
+
gr.File(label="Source Voice"),
|
473 |
+
gr.File(label="Target Voice")
|
474 |
],
|
475 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
476 |
+
title="Replace Voice with Another",
|
477 |
+
description="Clone voice from source to target speaker."
|
478 |
)
|
479 |
|
480 |
+
# --- AI Mastering Mode ---
|
481 |
+
with gr.Tab("π AI Mastering Mode"):
|
482 |
gr.Interface(
|
483 |
+
fn=ai_mastering,
|
484 |
+
inputs=gr.Audio(label="Upload Track", type="filepath"),
|
485 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
486 |
+
title="Auto-Master Your Track",
|
487 |
+
description="Smart mastering for streaming platforms like Spotify, YouTube, or podcasts."
|
|
|
|
|
|
|
|
|
488 |
)
|
489 |
|
490 |
+
# --- Mix Two Tracks ---
|
491 |
+
with gr.Tab("π Mix Two Tracks"):
|
492 |
gr.Interface(
|
493 |
+
fn=mix_tracks,
|
494 |
+
inputs=[
|
495 |
+
gr.File(label="Main Track"),
|
496 |
+
gr.File(label="Background Track"),
|
497 |
+
gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
|
498 |
+
],
|
499 |
+
outputs=gr.File(label="Mixed Output"),
|
500 |
+
title="Overlay Two Tracks",
|
501 |
+
description="Mix, blend, or subtract two audio files."
|
502 |
)
|
503 |
|
504 |
+
# --- Load/Save Project ---
|
505 |
+
with gr.Tab("π Save/Load Project"):
|
506 |
gr.Interface(
|
507 |
+
fn=save_project,
|
508 |
inputs=[
|
509 |
+
gr.File(label="Original Audio"),
|
510 |
+
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
511 |
+
gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
|
|
|
|
512 |
],
|
513 |
+
outputs=gr.File(label="Project File (.aiproj)"),
|
514 |
+
title="Save Everything Together",
|
515 |
+
description="Save your session, effects, and settings in one file to reuse later."
|
516 |
)
|
517 |
|
518 |
demo.launch()
|