Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ from faster_whisper import WhisperModel
|
|
| 22 |
from mutagen.mp3 import MP3
|
| 23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
| 24 |
|
| 25 |
-
# Suppress warnings
|
| 26 |
warnings.filterwarnings("ignore")
|
| 27 |
|
| 28 |
# === Helper Functions ===
|
|
@@ -54,7 +54,7 @@ def apply_reverb(audio):
|
|
| 54 |
return audio.overlay(reverb, position=1000)
|
| 55 |
|
| 56 |
def apply_pitch_shift(audio, semitones=-2):
|
| 57 |
-
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
|
| 58 |
samples = np.array(audio.get_array_of_samples())
|
| 59 |
resampled = np.interp(
|
| 60 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
|
@@ -306,47 +306,52 @@ def analyze_audio(audio_path):
|
|
| 306 |
|
| 307 |
return stats, image
|
| 308 |
|
| 309 |
-
# ===
|
| 310 |
-
def
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
return out_path
|
| 316 |
|
| 317 |
-
# ===
|
| 318 |
-
def
|
| 319 |
-
|
| 320 |
-
audio
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
audio.tags.add(TALB(encoding=3, text=album))
|
| 329 |
-
if year:
|
| 330 |
-
audio.tags.add(TYER(encoding=3, text=str(year)))
|
| 331 |
-
audio.save()
|
| 332 |
-
return file_path
|
| 333 |
-
except Exception as e:
|
| 334 |
-
return None
|
| 335 |
-
|
| 336 |
-
# === Voice Style Transfer (Dummy) ===
|
| 337 |
-
def apply_style_transfer(audio_path, mood="Happy"):
|
| 338 |
-
# Replace with real model later
|
| 339 |
-
return audio_path
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
|
| 346 |
-
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
| 347 |
-
return f"https://huggingface.co/spaces/tee342/AudioMaster?preset={encoded}"
|
| 348 |
|
| 349 |
-
# === UI ===
|
| 350 |
effect_options = [
|
| 351 |
"Noise Reduction",
|
| 352 |
"Compress Dynamic Range",
|
|
@@ -360,7 +365,7 @@ effect_options = [
|
|
| 360 |
]
|
| 361 |
|
| 362 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
| 363 |
-
gr.Markdown("## π§ AI Audio Studio
|
| 364 |
|
| 365 |
# --- Single File Studio ---
|
| 366 |
with gr.Tab("π΅ Single File Studio"):
|
|
@@ -433,7 +438,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 433 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
| 434 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
| 435 |
title="Transcribe & Edit Spoken Content",
|
| 436 |
-
description="Convert voice to text
|
| 437 |
)
|
| 438 |
|
| 439 |
# --- TTS Voice Generator ---
|
|
@@ -446,70 +451,68 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 446 |
description="Type anything and turn it into natural-sounding speech."
|
| 447 |
)
|
| 448 |
|
| 449 |
-
# ---
|
| 450 |
-
with gr.Tab("
|
| 451 |
gr.Interface(
|
| 452 |
-
fn=
|
| 453 |
-
inputs=
|
| 454 |
-
|
| 455 |
-
gr.
|
| 456 |
-
gr.Image(label="Waveform Graph")
|
| 457 |
],
|
| 458 |
-
|
| 459 |
-
|
|
|
|
| 460 |
)
|
| 461 |
|
| 462 |
-
# --- Voice
|
| 463 |
-
with gr.Tab("
|
| 464 |
gr.Interface(
|
| 465 |
-
fn=
|
| 466 |
inputs=[
|
| 467 |
-
gr.
|
| 468 |
-
gr.
|
| 469 |
],
|
| 470 |
-
outputs=gr.Audio(label="
|
| 471 |
-
title="
|
| 472 |
-
description="
|
| 473 |
)
|
| 474 |
|
| 475 |
-
# ---
|
| 476 |
-
with gr.Tab("
|
| 477 |
gr.Interface(
|
| 478 |
-
fn=
|
| 479 |
-
inputs=
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
],
|
| 484 |
-
outputs=gr.Textbox(label="Shareable Link", lines=1),
|
| 485 |
-
title="Save Your Settings and Share Them",
|
| 486 |
-
description="Generate a link to share your effect chain with others."
|
| 487 |
)
|
| 488 |
|
| 489 |
-
# ---
|
| 490 |
-
with gr.Tab("
|
| 491 |
gr.Interface(
|
| 492 |
-
fn=
|
| 493 |
-
inputs=
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
)
|
| 498 |
|
| 499 |
-
# ---
|
| 500 |
-
with gr.Tab("
|
| 501 |
gr.Interface(
|
| 502 |
-
fn=
|
| 503 |
inputs=[
|
| 504 |
-
gr.File(label="
|
| 505 |
-
gr.
|
| 506 |
-
gr.
|
| 507 |
-
gr.Textbox(label="Album"),
|
| 508 |
-
gr.Number(label="Year")
|
| 509 |
],
|
| 510 |
-
outputs=gr.File(label="
|
| 511 |
-
title="
|
| 512 |
-
description="
|
| 513 |
)
|
| 514 |
|
| 515 |
demo.launch()
|
|
|
|
| 22 |
from mutagen.mp3 import MP3
|
| 23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
| 24 |
|
| 25 |
+
# Suppress warnings
|
| 26 |
warnings.filterwarnings("ignore")
|
| 27 |
|
| 28 |
# === Helper Functions ===
|
|
|
|
| 54 |
return audio.overlay(reverb, position=1000)
|
| 55 |
|
| 56 |
def apply_pitch_shift(audio, semitones=-2):
|
| 57 |
+
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
|
| 58 |
samples = np.array(audio.get_array_of_samples())
|
| 59 |
resampled = np.interp(
|
| 60 |
np.arange(0, len(samples), 2 ** (semitones / 12)),
|
|
|
|
| 306 |
|
| 307 |
return stats, image
|
| 308 |
|
| 309 |
+
# === Auto-Save Sessions ===
|
| 310 |
+
def auto_save_session(data):
|
| 311 |
+
import base64
|
| 312 |
+
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
| 313 |
+
return f"https://your-studio-url?load={encoded}"
|
| 314 |
+
|
| 315 |
+
# === Voiceprint Matching ===
|
| 316 |
+
from resemblyzer import preprocess_wav, VoiceEncoder
|
| 317 |
+
|
| 318 |
+
encoder = VoiceEncoder()
|
| 319 |
+
|
| 320 |
+
def match_speakers(clip1, clip2):
|
| 321 |
+
wav1 = preprocess_wav(clip1)
|
| 322 |
+
wav2 = preprocess_wav(clip2)
|
| 323 |
+
embed1 = encoder.embed_utterance(wav1)
|
| 324 |
+
embed2 = encoder.embed_utterance(wav2)
|
| 325 |
+
similarity = np.inner(embed1, embed2)
|
| 326 |
+
return f"Speaker Match Score: {similarity:.2f}"
|
| 327 |
+
|
| 328 |
+
# === Mix Two Tracks ===
|
| 329 |
+
def mix_tracks(track1, track2, volume_offset=0):
|
| 330 |
+
a1 = AudioSegment.from_file(track1)
|
| 331 |
+
a2 = AudioSegment.from_file(track2)
|
| 332 |
+
mixed = a1.overlay(a2 - volume_offset)
|
| 333 |
+
out_path = os.path.join(tempfile.gettempdir(), "mixed.wav")
|
| 334 |
+
mixed.export(out_path, format="wav")
|
| 335 |
return out_path
|
| 336 |
|
| 337 |
+
# === Save/Load Project File (.aiproj) ===
|
| 338 |
+
def save_project(audio_path, preset_name, effects):
|
| 339 |
+
project_data = {
|
| 340 |
+
"audio": AudioSegment.from_file(audio_path).raw_data,
|
| 341 |
+
"preset": preset_name,
|
| 342 |
+
"effects": effects
|
| 343 |
+
}
|
| 344 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
| 345 |
+
with open(out_path, "wb") as f:
|
| 346 |
+
pickle.dump(project_data, f)
|
| 347 |
+
return out_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
+
def load_project(project_file):
|
| 350 |
+
with open(project_file.name, "rb") as f:
|
| 351 |
+
data = pickle.load(f)
|
| 352 |
+
return data["preset"], data["effects"]
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
+
# === UI ===
|
| 355 |
effect_options = [
|
| 356 |
"Noise Reduction",
|
| 357 |
"Compress Dynamic Range",
|
|
|
|
| 365 |
]
|
| 366 |
|
| 367 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
| 368 |
+
gr.Markdown("## π§ Ultimate AI Audio Studio\nUpload, edit, export β powered by AI!")
|
| 369 |
|
| 370 |
# --- Single File Studio ---
|
| 371 |
with gr.Tab("π΅ Single File Studio"):
|
|
|
|
| 438 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
| 439 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
| 440 |
title="Transcribe & Edit Spoken Content",
|
| 441 |
+
description="Convert voice to text and edit it before re-exporting."
|
| 442 |
)
|
| 443 |
|
| 444 |
# --- TTS Voice Generator ---
|
|
|
|
| 451 |
description="Type anything and turn it into natural-sounding speech."
|
| 452 |
)
|
| 453 |
|
| 454 |
+
# --- Voiceprint Matching ---
|
| 455 |
+
with gr.Tab("π§ββοΈ Match Speakers"):
|
| 456 |
gr.Interface(
|
| 457 |
+
fn=match_speakers,
|
| 458 |
+
inputs=[
|
| 459 |
+
gr.File(label="Clip 1"),
|
| 460 |
+
gr.File(label="Clip 2")
|
|
|
|
| 461 |
],
|
| 462 |
+
outputs=gr.Textbox(label="Match Score", lines=1),
|
| 463 |
+
title="Are These the Same Person?",
|
| 464 |
+
description="Detect speaker similarity using AI."
|
| 465 |
)
|
| 466 |
|
| 467 |
+
# --- Voice Cloning (AI Dubbing) ---
|
| 468 |
+
with gr.Tab("π Voice Cloning (AI Dubbing)"):
|
| 469 |
gr.Interface(
|
| 470 |
+
fn=clone_voice,
|
| 471 |
inputs=[
|
| 472 |
+
gr.File(label="Source Voice"),
|
| 473 |
+
gr.File(label="Target Voice")
|
| 474 |
],
|
| 475 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 476 |
+
title="Replace Voice with Another",
|
| 477 |
+
description="Clone voice from source to target speaker."
|
| 478 |
)
|
| 479 |
|
| 480 |
+
# --- AI Mastering Mode ---
|
| 481 |
+
with gr.Tab("π AI Mastering Mode"):
|
| 482 |
gr.Interface(
|
| 483 |
+
fn=ai_mastering,
|
| 484 |
+
inputs=gr.Audio(label="Upload Track", type="filepath"),
|
| 485 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
| 486 |
+
title="Auto-Master Your Track",
|
| 487 |
+
description="Smart mastering for streaming platforms like Spotify, YouTube, or podcasts."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
)
|
| 489 |
|
| 490 |
+
# --- Mix Two Tracks ---
|
| 491 |
+
with gr.Tab("π Mix Two Tracks"):
|
| 492 |
gr.Interface(
|
| 493 |
+
fn=mix_tracks,
|
| 494 |
+
inputs=[
|
| 495 |
+
gr.File(label="Main Track"),
|
| 496 |
+
gr.File(label="Background Track"),
|
| 497 |
+
gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
|
| 498 |
+
],
|
| 499 |
+
outputs=gr.File(label="Mixed Output"),
|
| 500 |
+
title="Overlay Two Tracks",
|
| 501 |
+
description="Mix, blend, or subtract two audio files."
|
| 502 |
)
|
| 503 |
|
| 504 |
+
# --- Load/Save Project ---
|
| 505 |
+
with gr.Tab("π Save/Load Project"):
|
| 506 |
gr.Interface(
|
| 507 |
+
fn=save_project,
|
| 508 |
inputs=[
|
| 509 |
+
gr.File(label="Original Audio"),
|
| 510 |
+
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
| 511 |
+
gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
|
|
|
|
|
|
| 512 |
],
|
| 513 |
+
outputs=gr.File(label="Project File (.aiproj)"),
|
| 514 |
+
title="Save Everything Together",
|
| 515 |
+
description="Save your session, effects, and settings in one file to reuse later."
|
| 516 |
)
|
| 517 |
|
| 518 |
demo.launch()
|