Spaces:

tee342
/

AudioMaster

Running

App Files Files Community

tee342 commited on Jun 12

Commit

1cda55b

verified ·

1 Parent(s): f9afce2

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -171

app.py CHANGED Viewed

@@ -118,9 +118,40 @@ def apply_stage_mode(audio):
     processed = apply_bass_boost(processed, gain=6)
     return apply_limiter(processed, limit_dB=-2)
 # === Auto-EQ per Genre ===
 def auto_eq(audio, genre="Pop"):
-    # Define frequency bands based on genre
     eq_map = {
         "Pop": [(200, 500, -3), (2000, 4000, +4)],  # Cut muddiness, boost vocals
         "EDM": [(60, 250, +6), (8000, 12000, +3)],  # Maximize bass & sparkle
@@ -169,7 +200,7 @@ def process_prompt(audio_path, prompt):
         audio = apply_reverb(audio)
     if "pitch" in prompt.lower() and "correct" in prompt.lower():
-        audio = apply_pitch_shift(audio, 0)  # Placeholder
     if "harmony" in prompt.lower() or "double" in prompt.lower():
         audio = apply_harmony(audio)
@@ -178,42 +209,20 @@ def process_prompt(audio_path, prompt):
     audio.export(out_path, format="wav")
     return out_path
-# === Real-Time EQ Sliders ===
-def real_time_eq(audio, low_gain=0, mid_gain=0, high_gain=0):
-    samples, sr = audiosegment_to_array(audio)
-    samples = samples.astype(np.float64)
-    # Low EQ: 20–500Hz
-    sos_low = butter(10, [20, 500], btype='band', output='sos', fs=sr)
-    samples = sosfilt(sos_low, samples) * (10 ** (low_gain / 20))
-    # Mid EQ: 500–4000Hz
-    sos_mid = butter(10, [500, 4000], btype='band', output='sos', fs=sr)
-    samples += sosfilt(sos_mid, samples) * (10 ** (mid_gain / 20))
-    # High EQ: 4000–20000Hz
-    sos_high = butter(10, [4000, 20000], btype='high', output='sos', fs=sr)
-    samples += sosfilt(sos_high, samples) * (10 ** (high_gain / 20))
-    return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
-# === AI Suggest Presets Based on Genre ===
-genre_preset_map = {
-    "Speech": ["Clean Podcast", "Normalize"],
-    "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
-    "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
-    "Rock": ["Distortion", "Punchy Mids", "Reverb"],
-    "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"]
-}
 def suggest_preset_by_genre(audio_path):
     try:
         y, sr = torchaudio.load(audio_path)
         mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
         genre = "Pop"
-        return genre_preset_map.get(genre, ["Default"])
     except Exception:
-        return ["Default"]
 # === Create Karaoke Video from Audio + Lyrics ===
 def create_karaoke_video(audio_path, lyrics, bg_image=None):
@@ -534,6 +543,7 @@ def diarize_and_transcribe(audio_path):
 def visualize_spectrum(audio_path):
     y, sr = torchaudio.load(audio_path)
     y_np = y.numpy().flatten()
     stft = librosa.stft(y_np)
     db = librosa.amplitude_to_db(abs(stft))
@@ -548,32 +558,6 @@ def visualize_spectrum(audio_path):
     buf.seek(0)
     return Image.open(buf)
-# === Real-Time EQ Slider Wrapper ===
-def real_time_eq_slider(audio, low_gain, mid_gain, high_gain):
-    return real_time_eq(audio, low_gain, mid_gain, high_gain)
-# === Cloud Project Sync (Premium Feature) ===
-def cloud_save_project(audio, preset, effects, project_name, project_id=""):
-    project_data = {
-        "audio": audio,
-        "preset": preset,
-        "effects": effects
-    }
-    project_path = os.path.join(tempfile.gettempdir(), f"{project_name}.aiproj")
-    with open(project_path, "wb") as f:
-        pickle.dump(project_data, f)
-    return project_path, f"✅ '{project_name}' saved to cloud"
-def cloud_load_project(project_id):
-    if not project_id:
-        return None, None, None
-    try:
-        with open(project_id, "rb") as f:
-            data = pickle.load(f)
-        return data["audio"], data["preset"], data["effects"]
-    except Exception:
-        return None, None, None
 # === UI ===
 effect_options = [
     "Noise Reduction",
@@ -663,35 +647,43 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             clear_btn=None
         )
-    # --- Genre Mastering Tab ===
-    with gr.Tab("🎧 Genre Mastering"):
         gr.Interface(
-            fn=lambda audio, genre: auto_eq(audio, genre),
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
-                gr.Dropdown(choices=list(genre_preset_map.keys()), label="Select Genre", value="Pop")
             ],
-            outputs=gr.Audio(label="Mastered Output", type="filepath"),
-            title="Genre-Specific Mastering",
-            description="Apply professionally tuned mastering settings for popular music genres."
         )
-    # --- Real-Time EQ ===
-    with gr.Tab("🎛 Real-Time EQ"):
         gr.Interface(
-            fn=real_time_eq_slider,
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
-                gr.Slider(minimum=-12, maximum=12, value=0, label="Low Gain (-200–500Hz)"),
-                gr.Slider(minimum=-12, maximum=12, value=0, label="Mid Gain (500Hz–4kHz)"),
-                gr.Slider(minimum=-12, maximum=12, value=0, label="High Gain (4kHz+)"),
             ],
-            outputs=gr.Audio(label="EQ'd Output", type="filepath"),
-            title="Adjust Frequency Bands Live",
-            description="Fine-tune your sound using real-time sliders for low, mid, and high frequencies."
         )
-    # --- Spectrum Visualizer ===
     with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
@@ -715,24 +707,40 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             allow_flagging="never"
         )
     # --- Vocal Presets for Singers ===
     with gr.Tab("🎤 Vocal Presets for Singers"):
         gr.Interface(
             fn=process_audio,
             inputs=[
                 gr.Audio(label="Upload Vocal Track", type="filepath"),
-                gr.CheckboxGroup(choices=[
-                    "Noise Reduction",
-                    "Normalize",
-                    "Compress Dynamic Range",
-                    "Bass Boost",
-                    "Treble Boost",
-                    "Reverb",
-                    "Auto Gain",
-                    "Vocal Distortion",
-                    "Harmony",
-                    "Stage Mode"
-                ]),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
                 gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
                 gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
@@ -764,7 +772,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
         )
     # --- Speaker Diarization ("Who Spoke When?") ===
-    if diarize_pipeline:
         with gr.Tab("🧏‍♂️ Who Spoke When?"):
             gr.Interface(
                 fn=diarize_and_transcribe,
@@ -833,87 +841,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Detect and trim silence at start/end or between words"
         )
-    # --- Save/Load Project File (.aiproj) ===
-    with gr.Tab("📁 Save/Load Project"):
-        gr.Interface(
-            fn=save_project,
-            inputs=[
-                gr.File(label="Original Audio"),
-                gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
-                gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
-            ],
-            outputs=gr.File(label="Project File (.aiproj)"),
-            title="Save Everything Together",
-            description="Save your session, effects, and settings in one file to reuse later."
-        )
-        gr.Interface(
-            fn=load_project,
-            inputs=gr.File(label="Upload .aiproj File"),
-            outputs=[
-                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
-                gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
-            ],
-            title="Resume Last Project",
-            description="Load your saved session"
-        )
-    # --- Cloud Project Sync (Premium Feature) ===
-    with gr.Tab("☁️ Cloud Project Sync"):
-        gr.Markdown("Save your projects online and resume them from any device.")
-        gr.Interface(
-            fn=cloud_save_project,
-            inputs=[
-                gr.File(label="Upload Audio", type="filepath"),
-                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
-                gr.CheckboxGroup(choices=effect_options, label="Effects Applied"),
-                gr.Textbox(label="Project Name"),
-                gr.Textbox(label="Project ID (Optional)")
-            ],
-            outputs=[
-                gr.File(label="Downloadable Project File"),
-                gr.Textbox(label="Status", value="✅ Ready", lines=1)
-            ],
-            title="Save to Cloud",
-            description="Save your project online and share it across devices."
-        )
-        gr.Interface(
-            fn=cloud_load_project,
-            inputs=gr.Textbox(label="Enter Project ID"),
-            outputs=[
-                gr.Audio(label="Loaded Audio", type="filepath"),
-                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
-                gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
-            ],
-            title="Load from Cloud",
-            description="Resume a project from the cloud",
-            allow_flagging="never"
-        )
-    # --- AI Suggest Presets Based on Genre ===
-    with gr.Tab("🧠 AI Suggest Preset"):
-        gr.Interface(
-            fn=suggest_preset_by_genre,
-            inputs=gr.Audio(label="Upload Track", type="filepath"),
-            outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
-            title="Let AI Recommend Best Preset",
-            description="Upload a track and let AI recommend the best preset based on genre."
-        )
-    # --- Create Karaoke Video from Audio + Lyrics ===
-    with gr.Tab("📹 Create Karaoke Video"):
-        gr.Interface(
-            fn=create_karaoke_video,
-            inputs=[
-                gr.Audio(label="Upload Track", type="filepath"),
-                gr.Textbox(label="Lyrics", lines=10),
-                gr.File(label="Background (Optional)"),
-            ],
-            outputs=gr.Video(label="Karaoke Video"),
-            title="Make Karaoke Videos from Audio + Lyrics",
-            description="Generate karaoke-style videos with real-time sync."
-        )
 demo.launch()

     processed = apply_bass_boost(processed, gain=6)
     return apply_limiter(processed, limit_dB=-2)
+# === Loudness Matching (EBU R128) ===
+try:
+    import pyloudnorm as pyln
+except ImportError:
+    print("Installing pyloudnorm...")
+    import subprocess
+    subprocess.run(["pip", "install", "pyloudnorm"])
+    import pyloudnorm as pyln
+def match_loudness(audio_path, target_lufs=-14.0):
+    meter = pyln.Meter(44100)
+    wav = AudioSegment.from_file(audio_path).set_frame_rate(44100)
+    samples = np.array(wav.get_array_of_samples()).astype(np.float64) / 32768.0
+    loudness = meter.integrated_loudness(samples)
+    gain_db = target_lufs - loudness
+    adjusted = wav + gain_db
+    out_path = os.path.join(tempfile.gettempdir(), "loudness_output.wav")
+    adjusted.export(out_path, format="wav")
+    return out_path
+# === Dynamic Range Compression Presets ===
+def apply_compression_preset(audio, preset="Radio Ready"):
+    if preset == "Radio Ready":
+        return audio.compress_dynamic_range(threshold=-5, ratio=4)
+    elif preset == "Podcast Safe":
+        return audio.compress_dynamic_range(threshold=-10, ratio=2)
+    elif preset == "Club Mix":
+        return audio.compress_dynamic_range(threshold=-3, ratio=6)
+    elif preset == "Speech":
+        return audio.compress_dynamic_range(threshold=-6, ratio=1.5)
+    return audio
 # === Auto-EQ per Genre ===
 def auto_eq(audio, genre="Pop"):
     eq_map = {
         "Pop": [(200, 500, -3), (2000, 4000, +4)],  # Cut muddiness, boost vocals
         "EDM": [(60, 250, +6), (8000, 12000, +3)],  # Maximize bass & sparkle
         audio = apply_reverb(audio)
     if "pitch" in prompt.lower() and "correct" in prompt.lower():
+        audio = apply_pitch_correction(audio)
     if "harmony" in prompt.lower() or "double" in prompt.lower():
         audio = apply_harmony(audio)
     audio.export(out_path, format="wav")
     return out_path
+# === AI Suggest Preset Based on Genre ===
 def suggest_preset_by_genre(audio_path):
     try:
         y, sr = torchaudio.load(audio_path)
         mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
         genre = "Pop"
+        return f"Suggested Preset: {genre}"
     except Exception:
+        return "Unknown"
+# === Vocal Pitch Correction – Auto-Tune Style ===
+def apply_pitch_correction(audio, target_key="C"):
+    # Placeholder: In real use, this would align pitch to the nearest key note
+    return apply_pitch_shift(audio, 0.2)
 # === Create Karaoke Video from Audio + Lyrics ===
 def create_karaoke_video(audio_path, lyrics, bg_image=None):
 def visualize_spectrum(audio_path):
     y, sr = torchaudio.load(audio_path)
     y_np = y.numpy().flatten()
     stft = librosa.stft(y_np)
     db = librosa.amplitude_to_db(abs(stft))
     buf.seek(0)
     return Image.open(buf)
 # === UI ===
 effect_options = [
     "Noise Reduction",
             clear_btn=None
         )
+    # --- Loudness Match (EBU R128) ===
+    with gr.Tab("📈 Loudness Match"):
         gr.Interface(
+            fn=match_loudness,
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
+                gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
             ],
+            outputs=gr.Audio(label="Normalized Output", type="filepath"),
+            title="Match Loudness (EBU R128)",
+            description="Ensure consistent loudness across tracks using industry-standard normalization."
         )
+    # --- Dynamic Compression Presets ===
+    with gr.Tab("🎛 Dynamic Compression Presets"):
         gr.Interface(
+            fn=apply_compression_preset,
             inputs=[
                 gr.Audio(label="Upload Track", type="filepath"),
+                gr.Dropdown(choices=["Radio Ready", "Podcast Safe", "Club Mix", "Speech"], label="Preset")
             ],
+            outputs=gr.Audio(label="Compressed Output", type="filepath"),
+            title="Apply Pre-Tuned Compression Settings",
+            description="Choose from compression presets used in radio, podcasting, club mixes, and speech editing."
         )
+    # --- AI Suggest Preset Based on Genre ===
+    with gr.Tab("🧠 AI Suggest Preset"):
+        gr.Interface(
+            fn=suggest_preset_by_genre,
+            inputs=gr.Audio(label="Upload Track", type="filepath"),
+            outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
+            title="AI Recommends Best Preset",
+            description="Upload a track and let AI recommend the best preset based on detected genre."
+        )
+    # --- Real-Time Spectrum Analyzer + EQ ===
     with gr.Tab("📊 Frequency Spectrum"):
         gr.Interface(
             fn=visualize_spectrum,
             allow_flagging="never"
         )
+    # --- Vocal Pitch Correction (Auto-Tune) ===
+    with gr.Tab("🧬 Vocal Pitch Correction"):
+        gr.Interface(
+            fn=apply_pitch_correction,
+            inputs=[
+                gr.Audio(label="Upload Vocal Clip", type="filepath"),
+                gr.Textbox(label="Target Key", value="C", lines=1)
+            ],
+            outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
+            title="Auto-Tune Style Pitch Correction",
+            description="Correct vocal pitch automatically"
+        )
+    # --- Create Karaoke Video from Audio + Lyrics ===
+    with gr.Tab("📹 Create Karaoke Video"):
+        gr.Interface(
+            fn=create_karaoke_video,
+            inputs=[
+                gr.Audio(label="Upload Track", type="filepath"),
+                gr.Textbox(label="Lyrics", lines=10),
+                gr.File(label="Background (Optional)")
+            ],
+            outputs=gr.Video(label="Karaoke Video"),
+            title="Make Karaoke Videos from Audio + Lyrics",
+            description="Generate karaoke-style videos with real-time sync."
+        )
     # --- Vocal Presets for Singers ===
     with gr.Tab("🎤 Vocal Presets for Singers"):
         gr.Interface(
             fn=process_audio,
             inputs=[
                 gr.Audio(label="Upload Vocal Track", type="filepath"),
+                gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
                 gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
                 gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
         )
     # --- Speaker Diarization ("Who Spoke When?") ===
+    if "diarize_pipeline" in locals():
         with gr.Tab("🧏‍♂️ Who Spoke When?"):
             gr.Interface(
                 fn=diarize_and_transcribe,
             description="Detect and trim silence at start/end or between words"
         )
 demo.launch()