Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,7 @@ from faster_whisper import WhisperModel
|
|
| 20 |
from TTS.api import TTS
|
| 21 |
import base64
|
| 22 |
import pickle
|
|
|
|
| 23 |
|
| 24 |
# Suppress warnings
|
| 25 |
warnings.filterwarnings("ignore")
|
|
@@ -136,7 +137,8 @@ def auto_eq(audio, genre="Pop"):
|
|
| 136 |
"Acoustic": [(100, 300, -3), (4000, 8000, +2)],
|
| 137 |
"Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
|
| 138 |
"Trap": [(80, 120, +6), (3000, 6000, -4)],
|
| 139 |
-
"LoFi": [(20, 200, +3), (1000, 3000, -2)]
|
|
|
|
| 140 |
}
|
| 141 |
|
| 142 |
from scipy.signal import butter, sosfilt
|
|
@@ -245,7 +247,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
| 245 |
status = f"β Error: {str(e)}"
|
| 246 |
return None, None, status, "", status
|
| 247 |
|
| 248 |
-
# ===
|
| 249 |
def show_waveform(audio_file):
|
| 250 |
try:
|
| 251 |
audio = AudioSegment.from_file(audio_file)
|
|
@@ -308,17 +310,14 @@ def get_preset_cards():
|
|
| 308 |
return card_paths
|
| 309 |
|
| 310 |
# === Load Preset by Name ===
|
| 311 |
-
def load_preset_by_card(
|
| 312 |
-
|
|
|
|
| 313 |
return name, preset_choices[name]
|
| 314 |
|
| 315 |
# === Logo Embedding (Base64 or file) ===
|
| 316 |
def get_logo():
|
| 317 |
-
|
| 318 |
-
with open("logo.png", "rb") as img_file:
|
| 319 |
-
return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
|
| 320 |
-
except FileNotFoundError:
|
| 321 |
-
return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
|
| 322 |
|
| 323 |
# === Main UI ===
|
| 324 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
@@ -329,8 +328,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 329 |
with gr.Row():
|
| 330 |
with gr.Column(min_width=300):
|
| 331 |
input_audio = gr.Audio(label="Upload Audio", type="filepath")
|
| 332 |
-
effect_checkbox = gr.CheckboxGroup(choices=preset_choices
|
| 333 |
-
label="Apply Effects in Order")
|
| 334 |
preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
|
| 335 |
export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 336 |
isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
|
|
@@ -338,12 +336,15 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 338 |
with gr.Column(min_width=300):
|
| 339 |
output_audio = gr.Audio(label="Processed Audio", type="filepath")
|
| 340 |
waveform_img = gr.Image(label="Waveform Preview")
|
| 341 |
-
|
|
|
|
| 342 |
status_box = gr.Textbox(label="Status", value="β
Ready", lines=1)
|
| 343 |
|
| 344 |
submit_btn.click(fn=process_audio, inputs=[
|
| 345 |
input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
|
| 346 |
-
], outputs=[
|
|
|
|
|
|
|
| 347 |
|
| 348 |
# --- AI Mastering Chain Tab ===
|
| 349 |
with gr.Tab("π§ AI Mastering Chain"):
|
|
@@ -384,22 +385,16 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 384 |
("https://via.placeholder.com/150x100?text=Rock", "Rock"),
|
| 385 |
("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
|
| 386 |
("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
|
| 387 |
-
("https://via.placeholder.com/150x100?text=Tube
|
| 388 |
("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
|
| 389 |
("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
|
| 390 |
]
|
| 391 |
|
| 392 |
preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
|
| 393 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
| 394 |
-
preset_effects_out = gr.CheckboxGroup(choices=
|
| 395 |
-
|
| 396 |
-
def select_preset(evt: gr.SelectData):
|
| 397 |
-
selected = evt.index
|
| 398 |
-
name = preset_names[selected % len(preset_names)]
|
| 399 |
-
effects = preset_choices.get(name, [])
|
| 400 |
-
return name, effects
|
| 401 |
|
| 402 |
-
preset_gallery.select(fn=
|
| 403 |
|
| 404 |
# --- Vocal Doubler / Harmonizer ===
|
| 405 |
with gr.Tab("π§ Vocal Doubler / Harmonizer"):
|
|
@@ -408,8 +403,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 408 |
inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
|
| 409 |
outputs=gr.Audio(label="Doubled Output", type="filepath"),
|
| 410 |
title="Add Vocal Doubling / Harmony",
|
| 411 |
-
description="Enhance vocals with doubling or harmony"
|
| 412 |
-
allow_flagging="never"
|
| 413 |
)
|
| 414 |
|
| 415 |
# --- Remix Mode ---
|
|
@@ -435,7 +429,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 435 |
fn=batch_process_audio,
|
| 436 |
inputs=[
|
| 437 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
| 438 |
-
gr.CheckboxGroup(choices=preset_choices
|
| 439 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 440 |
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
|
| 441 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
|
@@ -465,6 +459,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 465 |
)
|
| 466 |
|
| 467 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
with gr.Tab("π Frequency Spectrum"):
|
| 469 |
gr.Interface(
|
| 470 |
fn=visualize_spectrum,
|
|
@@ -488,6 +499,10 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 488 |
)
|
| 489 |
|
| 490 |
# --- Stereo Imaging Tool ===
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
with gr.Tab("π Stereo Imaging"):
|
| 492 |
gr.Interface(
|
| 493 |
fn=stereo_imaging,
|
|
@@ -502,13 +517,29 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 502 |
)
|
| 503 |
|
| 504 |
# --- Save/Load Mix Session (.aiproj) ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
with gr.Tab("π Save/Load Project"):
|
| 506 |
gr.Interface(
|
| 507 |
fn=save_project,
|
| 508 |
inputs=[
|
| 509 |
gr.File(label="Original Audio"),
|
| 510 |
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
| 511 |
-
gr.CheckboxGroup(choices=[
|
| 512 |
],
|
| 513 |
outputs=gr.File(label="Project File (.aiproj)"),
|
| 514 |
title="Save Everything Together",
|
|
@@ -520,13 +551,16 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 520 |
inputs=gr.File(label="Upload .aiproj File"),
|
| 521 |
outputs=[
|
| 522 |
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
| 523 |
-
gr.CheckboxGroup(choices=[
|
| 524 |
],
|
| 525 |
title="Resume Last Project",
|
| 526 |
description="Load your saved session"
|
| 527 |
)
|
| 528 |
|
| 529 |
# --- Prompt-Based Editing Tab ===
|
|
|
|
|
|
|
|
|
|
| 530 |
with gr.Tab("π§ Prompt-Based Editing"):
|
| 531 |
gr.Interface(
|
| 532 |
fn=process_prompt,
|
|
|
|
| 20 |
from TTS.api import TTS
|
| 21 |
import base64
|
| 22 |
import pickle
|
| 23 |
+
import json
|
| 24 |
|
| 25 |
# Suppress warnings
|
| 26 |
warnings.filterwarnings("ignore")
|
|
|
|
| 137 |
"Acoustic": [(100, 300, -3), (4000, 8000, +2)],
|
| 138 |
"Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
|
| 139 |
"Trap": [(80, 120, +6), (3000, 6000, -4)],
|
| 140 |
+
"LoFi": [(20, 200, +3), (1000, 3000, -2)],
|
| 141 |
+
"Default": []
|
| 142 |
}
|
| 143 |
|
| 144 |
from scipy.signal import butter, sosfilt
|
|
|
|
| 247 |
status = f"β Error: {str(e)}"
|
| 248 |
return None, None, status, "", status
|
| 249 |
|
| 250 |
+
# === Waveform + Spectrogram Generator ===
|
| 251 |
def show_waveform(audio_file):
|
| 252 |
try:
|
| 253 |
audio = AudioSegment.from_file(audio_file)
|
|
|
|
| 310 |
return card_paths
|
| 311 |
|
| 312 |
# === Load Preset by Name ===
|
| 313 |
+
def load_preset_by_card(evt: gr.SelectData):
|
| 314 |
+
index = evt.index % len(preset_names)
|
| 315 |
+
name = preset_names[index]
|
| 316 |
return name, preset_choices[name]
|
| 317 |
|
| 318 |
# === Logo Embedding (Base64 or file) ===
|
| 319 |
def get_logo():
|
| 320 |
+
return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
# === Main UI ===
|
| 323 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
|
|
| 328 |
with gr.Row():
|
| 329 |
with gr.Column(min_width=300):
|
| 330 |
input_audio = gr.Audio(label="Upload Audio", type="filepath")
|
| 331 |
+
effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
|
|
|
|
| 332 |
preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
|
| 333 |
export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
| 334 |
isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
|
|
|
|
| 336 |
with gr.Column(min_width=300):
|
| 337 |
output_audio = gr.Audio(label="Processed Audio", type="filepath")
|
| 338 |
waveform_img = gr.Image(label="Waveform Preview")
|
| 339 |
+
session_log_out = gr.Textbox(label="Session Log", lines=5)
|
| 340 |
+
genre_out = gr.Textbox(label="Detected Genre", lines=1)
|
| 341 |
status_box = gr.Textbox(label="Status", value="β
Ready", lines=1)
|
| 342 |
|
| 343 |
submit_btn.click(fn=process_audio, inputs=[
|
| 344 |
input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
|
| 345 |
+
], outputs=[
|
| 346 |
+
output_audio, waveform_img, session_log_out, genre_out, status_box
|
| 347 |
+
])
|
| 348 |
|
| 349 |
# --- AI Mastering Chain Tab ===
|
| 350 |
with gr.Tab("π§ AI Mastering Chain"):
|
|
|
|
| 385 |
("https://via.placeholder.com/150x100?text=Rock", "Rock"),
|
| 386 |
("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
|
| 387 |
("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
|
| 388 |
+
("https://via.placeholder.com/150x100?text=Tube", "Tube"),
|
| 389 |
("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
|
| 390 |
("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
|
| 391 |
]
|
| 392 |
|
| 393 |
preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
|
| 394 |
preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
|
| 395 |
+
preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices.keys())[0:], label="Effects")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
+
preset_gallery.select(fn=load_preset_by_card, inputs=[], outputs=[preset_name_out, preset_effects_out])
|
| 398 |
|
| 399 |
# --- Vocal Doubler / Harmonizer ===
|
| 400 |
with gr.Tab("π§ Vocal Doubler / Harmonizer"):
|
|
|
|
| 403 |
inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
|
| 404 |
outputs=gr.Audio(label="Doubled Output", type="filepath"),
|
| 405 |
title="Add Vocal Doubling / Harmony",
|
| 406 |
+
description="Enhance vocals with doubling or harmony"
|
|
|
|
| 407 |
)
|
| 408 |
|
| 409 |
# --- Remix Mode ---
|
|
|
|
| 429 |
fn=batch_process_audio,
|
| 430 |
inputs=[
|
| 431 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
| 432 |
+
gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Apply Effects in Order"),
|
| 433 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 434 |
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
|
| 435 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
|
|
|
| 459 |
)
|
| 460 |
|
| 461 |
# --- Real-Time Spectrum Analyzer + Live EQ Preview ===
|
| 462 |
+
def visualize_spectrum(audio_path):
|
| 463 |
+
y, sr = torchaudio.load(audio_path)
|
| 464 |
+
y_np = y.numpy().flatten()
|
| 465 |
+
stft = librosa.stft(y_np)
|
| 466 |
+
db = librosa.amplitude_to_db(abs(stft))
|
| 467 |
+
|
| 468 |
+
plt.figure(figsize=(10, 4))
|
| 469 |
+
img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
|
| 470 |
+
plt.colorbar(img, format="%+2.0f dB")
|
| 471 |
+
plt.title("Frequency Spectrum")
|
| 472 |
+
plt.tight_layout()
|
| 473 |
+
buf = BytesIO()
|
| 474 |
+
plt.savefig(buf, format="png")
|
| 475 |
+
plt.close()
|
| 476 |
+
buf.seek(0)
|
| 477 |
+
return Image.open(buf)
|
| 478 |
+
|
| 479 |
with gr.Tab("π Frequency Spectrum"):
|
| 480 |
gr.Interface(
|
| 481 |
fn=visualize_spectrum,
|
|
|
|
| 499 |
)
|
| 500 |
|
| 501 |
# --- Stereo Imaging Tool ===
|
| 502 |
+
def stereo_imaging(audio, mid_side_balance=0.5, stereo_spread=1.0):
|
| 503 |
+
samples, sr = audiosegment_to_array(AudioSegment.from_file(audio))
|
| 504 |
+
return array_to_audiosegment(samples, sr)
|
| 505 |
+
|
| 506 |
with gr.Tab("π Stereo Imaging"):
|
| 507 |
gr.Interface(
|
| 508 |
fn=stereo_imaging,
|
|
|
|
| 517 |
)
|
| 518 |
|
| 519 |
# --- Save/Load Mix Session (.aiproj) ===
|
| 520 |
+
def save_project(audio, preset, effects):
|
| 521 |
+
project_data = {
|
| 522 |
+
"audio": AudioSegment.from_file(audio).raw_data,
|
| 523 |
+
"preset": preset,
|
| 524 |
+
"effects": effects
|
| 525 |
+
}
|
| 526 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
| 527 |
+
with open(out_path, "wb") as f:
|
| 528 |
+
pickle.dump(project_data, f)
|
| 529 |
+
return out_path
|
| 530 |
+
|
| 531 |
+
def load_project(project_file):
|
| 532 |
+
with open(project_file.name, "rb") as f:
|
| 533 |
+
data = pickle.load(f)
|
| 534 |
+
return data["preset"], data["effects"]
|
| 535 |
+
|
| 536 |
with gr.Tab("π Save/Load Project"):
|
| 537 |
gr.Interface(
|
| 538 |
fn=save_project,
|
| 539 |
inputs=[
|
| 540 |
gr.File(label="Original Audio"),
|
| 541 |
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
| 542 |
+
gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
|
| 543 |
],
|
| 544 |
outputs=gr.File(label="Project File (.aiproj)"),
|
| 545 |
title="Save Everything Together",
|
|
|
|
| 551 |
inputs=gr.File(label="Upload .aiproj File"),
|
| 552 |
outputs=[
|
| 553 |
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
| 554 |
+
gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
|
| 555 |
],
|
| 556 |
title="Resume Last Project",
|
| 557 |
description="Load your saved session"
|
| 558 |
)
|
| 559 |
|
| 560 |
# --- Prompt-Based Editing Tab ===
|
| 561 |
+
def process_prompt(audio, prompt):
|
| 562 |
+
return apply_noise_reduction(audio)
|
| 563 |
+
|
| 564 |
with gr.Tab("π§ Prompt-Based Editing"):
|
| 565 |
gr.Interface(
|
| 566 |
fn=process_prompt,
|