Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -118,9 +118,40 @@ def apply_stage_mode(audio):
|
|
| 118 |
processed = apply_bass_boost(processed, gain=6)
|
| 119 |
return apply_limiter(processed, limit_dB=-2)
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
# === Auto-EQ per Genre ===
|
| 122 |
def auto_eq(audio, genre="Pop"):
|
| 123 |
-
# Define frequency bands based on genre
|
| 124 |
eq_map = {
|
| 125 |
"Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
|
| 126 |
"EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
|
|
@@ -169,7 +200,7 @@ def process_prompt(audio_path, prompt):
|
|
| 169 |
audio = apply_reverb(audio)
|
| 170 |
|
| 171 |
if "pitch" in prompt.lower() and "correct" in prompt.lower():
|
| 172 |
-
audio =
|
| 173 |
|
| 174 |
if "harmony" in prompt.lower() or "double" in prompt.lower():
|
| 175 |
audio = apply_harmony(audio)
|
|
@@ -178,42 +209,20 @@ def process_prompt(audio_path, prompt):
|
|
| 178 |
audio.export(out_path, format="wav")
|
| 179 |
return out_path
|
| 180 |
|
| 181 |
-
# ===
|
| 182 |
-
def real_time_eq(audio, low_gain=0, mid_gain=0, high_gain=0):
|
| 183 |
-
samples, sr = audiosegment_to_array(audio)
|
| 184 |
-
samples = samples.astype(np.float64)
|
| 185 |
-
|
| 186 |
-
# Low EQ: 20β500Hz
|
| 187 |
-
sos_low = butter(10, [20, 500], btype='band', output='sos', fs=sr)
|
| 188 |
-
samples = sosfilt(sos_low, samples) * (10 ** (low_gain / 20))
|
| 189 |
-
|
| 190 |
-
# Mid EQ: 500β4000Hz
|
| 191 |
-
sos_mid = butter(10, [500, 4000], btype='band', output='sos', fs=sr)
|
| 192 |
-
samples += sosfilt(sos_mid, samples) * (10 ** (mid_gain / 20))
|
| 193 |
-
|
| 194 |
-
# High EQ: 4000β20000Hz
|
| 195 |
-
sos_high = butter(10, [4000, 20000], btype='high', output='sos', fs=sr)
|
| 196 |
-
samples += sosfilt(sos_high, samples) * (10 ** (high_gain / 20))
|
| 197 |
-
|
| 198 |
-
return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
|
| 199 |
-
|
| 200 |
-
# === AI Suggest Presets Based on Genre ===
|
| 201 |
-
genre_preset_map = {
|
| 202 |
-
"Speech": ["Clean Podcast", "Normalize"],
|
| 203 |
-
"Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
|
| 204 |
-
"EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
|
| 205 |
-
"Rock": ["Distortion", "Punchy Mids", "Reverb"],
|
| 206 |
-
"Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"]
|
| 207 |
-
}
|
| 208 |
-
|
| 209 |
def suggest_preset_by_genre(audio_path):
|
| 210 |
try:
|
| 211 |
y, sr = torchaudio.load(audio_path)
|
| 212 |
mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
|
| 213 |
genre = "Pop"
|
| 214 |
-
return
|
| 215 |
except Exception:
|
| 216 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# === Create Karaoke Video from Audio + Lyrics ===
|
| 219 |
def create_karaoke_video(audio_path, lyrics, bg_image=None):
|
|
@@ -534,6 +543,7 @@ def diarize_and_transcribe(audio_path):
|
|
| 534 |
def visualize_spectrum(audio_path):
|
| 535 |
y, sr = torchaudio.load(audio_path)
|
| 536 |
y_np = y.numpy().flatten()
|
|
|
|
| 537 |
stft = librosa.stft(y_np)
|
| 538 |
db = librosa.amplitude_to_db(abs(stft))
|
| 539 |
|
|
@@ -548,32 +558,6 @@ def visualize_spectrum(audio_path):
|
|
| 548 |
buf.seek(0)
|
| 549 |
return Image.open(buf)
|
| 550 |
|
| 551 |
-
# === Real-Time EQ Slider Wrapper ===
|
| 552 |
-
def real_time_eq_slider(audio, low_gain, mid_gain, high_gain):
|
| 553 |
-
return real_time_eq(audio, low_gain, mid_gain, high_gain)
|
| 554 |
-
|
| 555 |
-
# === Cloud Project Sync (Premium Feature) ===
|
| 556 |
-
def cloud_save_project(audio, preset, effects, project_name, project_id=""):
|
| 557 |
-
project_data = {
|
| 558 |
-
"audio": audio,
|
| 559 |
-
"preset": preset,
|
| 560 |
-
"effects": effects
|
| 561 |
-
}
|
| 562 |
-
project_path = os.path.join(tempfile.gettempdir(), f"{project_name}.aiproj")
|
| 563 |
-
with open(project_path, "wb") as f:
|
| 564 |
-
pickle.dump(project_data, f)
|
| 565 |
-
return project_path, f"β
'{project_name}' saved to cloud"
|
| 566 |
-
|
| 567 |
-
def cloud_load_project(project_id):
|
| 568 |
-
if not project_id:
|
| 569 |
-
return None, None, None
|
| 570 |
-
try:
|
| 571 |
-
with open(project_id, "rb") as f:
|
| 572 |
-
data = pickle.load(f)
|
| 573 |
-
return data["audio"], data["preset"], data["effects"]
|
| 574 |
-
except Exception:
|
| 575 |
-
return None, None, None
|
| 576 |
-
|
| 577 |
# === UI ===
|
| 578 |
effect_options = [
|
| 579 |
"Noise Reduction",
|
|
@@ -663,35 +647,43 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 663 |
clear_btn=None
|
| 664 |
)
|
| 665 |
|
| 666 |
-
# ---
|
| 667 |
-
with gr.Tab("
|
| 668 |
gr.Interface(
|
| 669 |
-
fn=
|
| 670 |
inputs=[
|
| 671 |
gr.Audio(label="Upload Track", type="filepath"),
|
| 672 |
-
gr.
|
| 673 |
],
|
| 674 |
-
outputs=gr.Audio(label="
|
| 675 |
-
title="
|
| 676 |
-
description="
|
| 677 |
)
|
| 678 |
|
| 679 |
-
# ---
|
| 680 |
-
with gr.Tab("π
|
| 681 |
gr.Interface(
|
| 682 |
-
fn=
|
| 683 |
inputs=[
|
| 684 |
gr.Audio(label="Upload Track", type="filepath"),
|
| 685 |
-
gr.
|
| 686 |
-
gr.Slider(minimum=-12, maximum=12, value=0, label="Mid Gain (500Hzβ4kHz)"),
|
| 687 |
-
gr.Slider(minimum=-12, maximum=12, value=0, label="High Gain (4kHz+)"),
|
| 688 |
],
|
| 689 |
-
outputs=gr.Audio(label="
|
| 690 |
-
title="
|
| 691 |
-
description="
|
| 692 |
)
|
| 693 |
|
| 694 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
with gr.Tab("π Frequency Spectrum"):
|
| 696 |
gr.Interface(
|
| 697 |
fn=visualize_spectrum,
|
|
@@ -715,24 +707,40 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 715 |
allow_flagging="never"
|
| 716 |
)
|
| 717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
# --- Vocal Presets for Singers ===
|
| 719 |
with gr.Tab("π€ Vocal Presets for Singers"):
|
| 720 |
gr.Interface(
|
| 721 |
fn=process_audio,
|
| 722 |
inputs=[
|
| 723 |
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
| 724 |
-
gr.CheckboxGroup(choices=
|
| 725 |
-
"Noise Reduction",
|
| 726 |
-
"Normalize",
|
| 727 |
-
"Compress Dynamic Range",
|
| 728 |
-
"Bass Boost",
|
| 729 |
-
"Treble Boost",
|
| 730 |
-
"Reverb",
|
| 731 |
-
"Auto Gain",
|
| 732 |
-
"Vocal Distortion",
|
| 733 |
-
"Harmony",
|
| 734 |
-
"Stage Mode"
|
| 735 |
-
]),
|
| 736 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 737 |
gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
|
| 738 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
|
@@ -764,7 +772,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 764 |
)
|
| 765 |
|
| 766 |
# --- Speaker Diarization ("Who Spoke When?") ===
|
| 767 |
-
if diarize_pipeline:
|
| 768 |
with gr.Tab("π§ββοΈ Who Spoke When?"):
|
| 769 |
gr.Interface(
|
| 770 |
fn=diarize_and_transcribe,
|
|
@@ -833,87 +841,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 833 |
description="Detect and trim silence at start/end or between words"
|
| 834 |
)
|
| 835 |
|
| 836 |
-
# --- Save/Load Project File (.aiproj) ===
|
| 837 |
-
with gr.Tab("π Save/Load Project"):
|
| 838 |
-
gr.Interface(
|
| 839 |
-
fn=save_project,
|
| 840 |
-
inputs=[
|
| 841 |
-
gr.File(label="Original Audio"),
|
| 842 |
-
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
| 843 |
-
gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
| 844 |
-
],
|
| 845 |
-
outputs=gr.File(label="Project File (.aiproj)"),
|
| 846 |
-
title="Save Everything Together",
|
| 847 |
-
description="Save your session, effects, and settings in one file to reuse later."
|
| 848 |
-
)
|
| 849 |
-
|
| 850 |
-
gr.Interface(
|
| 851 |
-
fn=load_project,
|
| 852 |
-
inputs=gr.File(label="Upload .aiproj File"),
|
| 853 |
-
outputs=[
|
| 854 |
-
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
| 855 |
-
gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
| 856 |
-
],
|
| 857 |
-
title="Resume Last Project",
|
| 858 |
-
description="Load your saved session"
|
| 859 |
-
)
|
| 860 |
-
|
| 861 |
-
# --- Cloud Project Sync (Premium Feature) ===
|
| 862 |
-
with gr.Tab("βοΈ Cloud Project Sync"):
|
| 863 |
-
gr.Markdown("Save your projects online and resume them from any device.")
|
| 864 |
-
|
| 865 |
-
gr.Interface(
|
| 866 |
-
fn=cloud_save_project,
|
| 867 |
-
inputs=[
|
| 868 |
-
gr.File(label="Upload Audio", type="filepath"),
|
| 869 |
-
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
|
| 870 |
-
gr.CheckboxGroup(choices=effect_options, label="Effects Applied"),
|
| 871 |
-
gr.Textbox(label="Project Name"),
|
| 872 |
-
gr.Textbox(label="Project ID (Optional)")
|
| 873 |
-
],
|
| 874 |
-
outputs=[
|
| 875 |
-
gr.File(label="Downloadable Project File"),
|
| 876 |
-
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
| 877 |
-
],
|
| 878 |
-
title="Save to Cloud",
|
| 879 |
-
description="Save your project online and share it across devices."
|
| 880 |
-
)
|
| 881 |
-
|
| 882 |
-
gr.Interface(
|
| 883 |
-
fn=cloud_load_project,
|
| 884 |
-
inputs=gr.Textbox(label="Enter Project ID"),
|
| 885 |
-
outputs=[
|
| 886 |
-
gr.Audio(label="Loaded Audio", type="filepath"),
|
| 887 |
-
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
| 888 |
-
gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
| 889 |
-
],
|
| 890 |
-
title="Load from Cloud",
|
| 891 |
-
description="Resume a project from the cloud",
|
| 892 |
-
allow_flagging="never"
|
| 893 |
-
)
|
| 894 |
-
|
| 895 |
-
# --- AI Suggest Presets Based on Genre ===
|
| 896 |
-
with gr.Tab("π§ AI Suggest Preset"):
|
| 897 |
-
gr.Interface(
|
| 898 |
-
fn=suggest_preset_by_genre,
|
| 899 |
-
inputs=gr.Audio(label="Upload Track", type="filepath"),
|
| 900 |
-
outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
|
| 901 |
-
title="Let AI Recommend Best Preset",
|
| 902 |
-
description="Upload a track and let AI recommend the best preset based on genre."
|
| 903 |
-
)
|
| 904 |
-
|
| 905 |
-
# --- Create Karaoke Video from Audio + Lyrics ===
|
| 906 |
-
with gr.Tab("πΉ Create Karaoke Video"):
|
| 907 |
-
gr.Interface(
|
| 908 |
-
fn=create_karaoke_video,
|
| 909 |
-
inputs=[
|
| 910 |
-
gr.Audio(label="Upload Track", type="filepath"),
|
| 911 |
-
gr.Textbox(label="Lyrics", lines=10),
|
| 912 |
-
gr.File(label="Background (Optional)"),
|
| 913 |
-
],
|
| 914 |
-
outputs=gr.Video(label="Karaoke Video"),
|
| 915 |
-
title="Make Karaoke Videos from Audio + Lyrics",
|
| 916 |
-
description="Generate karaoke-style videos with real-time sync."
|
| 917 |
-
)
|
| 918 |
-
|
| 919 |
demo.launch()
|
|
|
|
| 118 |
processed = apply_bass_boost(processed, gain=6)
|
| 119 |
return apply_limiter(processed, limit_dB=-2)
|
| 120 |
|
| 121 |
+
# === Loudness Matching (EBU R128) ===
|
| 122 |
+
try:
|
| 123 |
+
import pyloudnorm as pyln
|
| 124 |
+
except ImportError:
|
| 125 |
+
print("Installing pyloudnorm...")
|
| 126 |
+
import subprocess
|
| 127 |
+
subprocess.run(["pip", "install", "pyloudnorm"])
|
| 128 |
+
import pyloudnorm as pyln
|
| 129 |
+
|
| 130 |
+
def match_loudness(audio_path, target_lufs=-14.0):
|
| 131 |
+
meter = pyln.Meter(44100)
|
| 132 |
+
wav = AudioSegment.from_file(audio_path).set_frame_rate(44100)
|
| 133 |
+
samples = np.array(wav.get_array_of_samples()).astype(np.float64) / 32768.0
|
| 134 |
+
loudness = meter.integrated_loudness(samples)
|
| 135 |
+
gain_db = target_lufs - loudness
|
| 136 |
+
adjusted = wav + gain_db
|
| 137 |
+
out_path = os.path.join(tempfile.gettempdir(), "loudness_output.wav")
|
| 138 |
+
adjusted.export(out_path, format="wav")
|
| 139 |
+
return out_path
|
| 140 |
+
|
| 141 |
+
# === Dynamic Range Compression Presets ===
|
| 142 |
+
def apply_compression_preset(audio, preset="Radio Ready"):
|
| 143 |
+
if preset == "Radio Ready":
|
| 144 |
+
return audio.compress_dynamic_range(threshold=-5, ratio=4)
|
| 145 |
+
elif preset == "Podcast Safe":
|
| 146 |
+
return audio.compress_dynamic_range(threshold=-10, ratio=2)
|
| 147 |
+
elif preset == "Club Mix":
|
| 148 |
+
return audio.compress_dynamic_range(threshold=-3, ratio=6)
|
| 149 |
+
elif preset == "Speech":
|
| 150 |
+
return audio.compress_dynamic_range(threshold=-6, ratio=1.5)
|
| 151 |
+
return audio
|
| 152 |
+
|
| 153 |
# === Auto-EQ per Genre ===
|
| 154 |
def auto_eq(audio, genre="Pop"):
|
|
|
|
| 155 |
eq_map = {
|
| 156 |
"Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
|
| 157 |
"EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
|
|
|
|
| 200 |
audio = apply_reverb(audio)
|
| 201 |
|
| 202 |
if "pitch" in prompt.lower() and "correct" in prompt.lower():
|
| 203 |
+
audio = apply_pitch_correction(audio)
|
| 204 |
|
| 205 |
if "harmony" in prompt.lower() or "double" in prompt.lower():
|
| 206 |
audio = apply_harmony(audio)
|
|
|
|
| 209 |
audio.export(out_path, format="wav")
|
| 210 |
return out_path
|
| 211 |
|
| 212 |
+
# === AI Suggest Preset Based on Genre ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
def suggest_preset_by_genre(audio_path):
|
| 214 |
try:
|
| 215 |
y, sr = torchaudio.load(audio_path)
|
| 216 |
mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
|
| 217 |
genre = "Pop"
|
| 218 |
+
return f"Suggested Preset: {genre}"
|
| 219 |
except Exception:
|
| 220 |
+
return "Unknown"
|
| 221 |
+
|
| 222 |
+
# === Vocal Pitch Correction β Auto-Tune Style ===
|
| 223 |
+
def apply_pitch_correction(audio, target_key="C"):
|
| 224 |
+
# Placeholder: In real use, this would align pitch to the nearest key note
|
| 225 |
+
return apply_pitch_shift(audio, 0.2)
|
| 226 |
|
| 227 |
# === Create Karaoke Video from Audio + Lyrics ===
|
| 228 |
def create_karaoke_video(audio_path, lyrics, bg_image=None):
|
|
|
|
| 543 |
def visualize_spectrum(audio_path):
|
| 544 |
y, sr = torchaudio.load(audio_path)
|
| 545 |
y_np = y.numpy().flatten()
|
| 546 |
+
|
| 547 |
stft = librosa.stft(y_np)
|
| 548 |
db = librosa.amplitude_to_db(abs(stft))
|
| 549 |
|
|
|
|
| 558 |
buf.seek(0)
|
| 559 |
return Image.open(buf)
|
| 560 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
# === UI ===
|
| 562 |
effect_options = [
|
| 563 |
"Noise Reduction",
|
|
|
|
| 647 |
clear_btn=None
|
| 648 |
)
|
| 649 |
|
| 650 |
+
# --- Loudness Match (EBU R128) ===
|
| 651 |
+
with gr.Tab("π Loudness Match"):
|
| 652 |
gr.Interface(
|
| 653 |
+
fn=match_loudness,
|
| 654 |
inputs=[
|
| 655 |
gr.Audio(label="Upload Track", type="filepath"),
|
| 656 |
+
gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
|
| 657 |
],
|
| 658 |
+
outputs=gr.Audio(label="Normalized Output", type="filepath"),
|
| 659 |
+
title="Match Loudness (EBU R128)",
|
| 660 |
+
description="Ensure consistent loudness across tracks using industry-standard normalization."
|
| 661 |
)
|
| 662 |
|
| 663 |
+
# --- Dynamic Compression Presets ===
|
| 664 |
+
with gr.Tab("π Dynamic Compression Presets"):
|
| 665 |
gr.Interface(
|
| 666 |
+
fn=apply_compression_preset,
|
| 667 |
inputs=[
|
| 668 |
gr.Audio(label="Upload Track", type="filepath"),
|
| 669 |
+
gr.Dropdown(choices=["Radio Ready", "Podcast Safe", "Club Mix", "Speech"], label="Preset")
|
|
|
|
|
|
|
| 670 |
],
|
| 671 |
+
outputs=gr.Audio(label="Compressed Output", type="filepath"),
|
| 672 |
+
title="Apply Pre-Tuned Compression Settings",
|
| 673 |
+
description="Choose from compression presets used in radio, podcasting, club mixes, and speech editing."
|
| 674 |
)
|
| 675 |
|
| 676 |
+
# --- AI Suggest Preset Based on Genre ===
|
| 677 |
+
with gr.Tab("π§ AI Suggest Preset"):
|
| 678 |
+
gr.Interface(
|
| 679 |
+
fn=suggest_preset_by_genre,
|
| 680 |
+
inputs=gr.Audio(label="Upload Track", type="filepath"),
|
| 681 |
+
outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
|
| 682 |
+
title="AI Recommends Best Preset",
|
| 683 |
+
description="Upload a track and let AI recommend the best preset based on detected genre."
|
| 684 |
+
)
|
| 685 |
+
|
| 686 |
+
# --- Real-Time Spectrum Analyzer + EQ ===
|
| 687 |
with gr.Tab("π Frequency Spectrum"):
|
| 688 |
gr.Interface(
|
| 689 |
fn=visualize_spectrum,
|
|
|
|
| 707 |
allow_flagging="never"
|
| 708 |
)
|
| 709 |
|
| 710 |
+
# --- Vocal Pitch Correction (Auto-Tune) ===
|
| 711 |
+
with gr.Tab("𧬠Vocal Pitch Correction"):
|
| 712 |
+
gr.Interface(
|
| 713 |
+
fn=apply_pitch_correction,
|
| 714 |
+
inputs=[
|
| 715 |
+
gr.Audio(label="Upload Vocal Clip", type="filepath"),
|
| 716 |
+
gr.Textbox(label="Target Key", value="C", lines=1)
|
| 717 |
+
],
|
| 718 |
+
outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
|
| 719 |
+
title="Auto-Tune Style Pitch Correction",
|
| 720 |
+
description="Correct vocal pitch automatically"
|
| 721 |
+
)
|
| 722 |
+
|
| 723 |
+
# --- Create Karaoke Video from Audio + Lyrics ===
|
| 724 |
+
with gr.Tab("πΉ Create Karaoke Video"):
|
| 725 |
+
gr.Interface(
|
| 726 |
+
fn=create_karaoke_video,
|
| 727 |
+
inputs=[
|
| 728 |
+
gr.Audio(label="Upload Track", type="filepath"),
|
| 729 |
+
gr.Textbox(label="Lyrics", lines=10),
|
| 730 |
+
gr.File(label="Background (Optional)")
|
| 731 |
+
],
|
| 732 |
+
outputs=gr.Video(label="Karaoke Video"),
|
| 733 |
+
title="Make Karaoke Videos from Audio + Lyrics",
|
| 734 |
+
description="Generate karaoke-style videos with real-time sync."
|
| 735 |
+
)
|
| 736 |
+
|
| 737 |
# --- Vocal Presets for Singers ===
|
| 738 |
with gr.Tab("π€ Vocal Presets for Singers"):
|
| 739 |
gr.Interface(
|
| 740 |
fn=process_audio,
|
| 741 |
inputs=[
|
| 742 |
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
| 743 |
+
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
| 745 |
gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
|
| 746 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
|
|
|
| 772 |
)
|
| 773 |
|
| 774 |
# --- Speaker Diarization ("Who Spoke When?") ===
|
| 775 |
+
if "diarize_pipeline" in locals():
|
| 776 |
with gr.Tab("π§ββοΈ Who Spoke When?"):
|
| 777 |
gr.Interface(
|
| 778 |
fn=diarize_and_transcribe,
|
|
|
|
| 841 |
description="Detect and trim silence at start/end or between words"
|
| 842 |
)
|
| 843 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 844 |
demo.launch()
|