Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12

Commit

3a131d6

verified ·

1 Parent(s): 7841500

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -12

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ import datetime
 import librosa
 import joblib
 import warnings
 # Suppress warnings for cleaner logs
 warnings.filterwarnings("ignore")
@@ -51,7 +54,7 @@ def apply_reverb(audio):
     return audio.overlay(reverb, position=1000)
 def apply_pitch_shift(audio, semitones=-2):
-    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
     samples = np.array(audio.get_array_of_samples())
     resampled = np.interp(
         np.arange(0, len(samples), 2 ** (semitones / 12)),
@@ -172,7 +175,6 @@ def detect_genre(audio_path):
     try:
         y, sr = torchaudio.load(audio_path)
         mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
-        # Dummy classifier – replace with real one later
         return "Speech"
     except Exception:
         return "Unknown"
@@ -195,7 +197,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
     try:
         audio = AudioSegment.from_file(audio_file)
         status = "🛠 Applying effects..."
         effect_map = {
             "Noise Reduction": apply_noise_reduction,
             "Compress Dynamic Range": apply_compression,
@@ -214,7 +216,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
                 audio = effect_map[effect_name](audio)
         status = "💾 Saving final audio..."
-        with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as f:
             if isolate_vocals:
                 temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
                 audio.export(temp_input, format="wav")
@@ -262,7 +264,87 @@ def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, ex
     except Exception as e:
         return None, f"❌ Batch processing failed: {str(e)}"
-# === Gradio Interface Setup ===
 effect_options = [
     "Noise Reduction",
     "Compress Dynamic Range",
@@ -275,13 +357,10 @@ effect_options = [
     "Normalize"
 ]
-# === Multi-Tab UI ===
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
-    gr.Markdown("""
-    # 🎧 AI Audio Studio – Powered by Hugging Face & Demucs
-    Upload, edit, and export audio with AI-powered tools.
-    """)
     with gr.Tab("🎵 Single File Studio"):
         gr.Interface(
             fn=process_audio,
@@ -306,11 +385,12 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             clear_btn=None
         )
     with gr.Tab("🔊 Batch Processing"):
         gr.Interface(
             fn=batch_process_audio,
             inputs=[
-                gr.File(label="Upload Multiple Audio Files", file_count="multiple"),
                 gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
                 gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
@@ -327,7 +407,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             clear_btn=None
         )
-    with gr.Tab("🎛 Remix Mode (Split Stems)"):
         gr.Interface(
             fn=stem_split,
             inputs=gr.Audio(label="Upload Music Track", type="filepath"),
@@ -343,4 +424,90 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             clear_btn=None
         )
 demo.launch()

 import librosa
 import joblib
 import warnings
+from faster_whisper import WhisperModel
+from mutagen.mp3 import MP3
+from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
 # Suppress warnings for cleaner logs
 warnings.filterwarnings("ignore")
     return audio.overlay(reverb, position=1000)
 def apply_pitch_shift(audio, semitones=-2):
+    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))
     samples = np.array(audio.get_array_of_samples())
     resampled = np.interp(
         np.arange(0, len(samples), 2 ** (semitones / 12)),
     try:
         y, sr = torchaudio.load(audio_path)
         mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
         return "Speech"
     except Exception:
         return "Unknown"
     try:
         audio = AudioSegment.from_file(audio_file)
         status = "🛠 Applying effects..."
         effect_map = {
             "Noise Reduction": apply_noise_reduction,
             "Compress Dynamic Range": apply_compression,
                 audio = effect_map[effect_name](audio)
         status = "💾 Saving final audio..."
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
             if isolate_vocals:
                 temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
                 audio.export(temp_input, format="wav")
     except Exception as e:
         return None, f"❌ Batch processing failed: {str(e)}"
+# === Whisper Transcription Tab ===
+whisper_model = WhisperModel("base")
+def transcribe_audio(audio_path):
+    segments, info = whisper_model.transcribe(audio_path, beam_size=5)
+    text = " ".join([seg.text for seg in segments])
+    return text
+# === TTS Tab ===
+from TTS.api import TTS
+tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
+def generate_tts(text):
+    out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
+    tts.tts_to_file(text=text, file_path=out_path)
+    return out_path
+# === Analyze Audio Stats ===
+def analyze_audio(audio_path):
+    y, sr = torchaudio.load(audio_path)
+    rms = np.mean(librosa.feature.rms(y=y.numpy().flatten()))
+    tempo, _ = librosa.beat.beat_track(y=y.numpy().flatten(), sr=sr)
+    silence_ratio = np.mean(np.abs(y.numpy()) < 0.01)
+    plt.figure(figsize=(10, 4))
+    plt.plot(y.numpy().flatten(), color="lightblue")
+    plt.title("Loudness Over Time")
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png")
+    plt.close()
+    image = Image.open(buf)
+    stats = {
+        "rms_loudness": float(rms),
+        "silence_ratio": float(silence_ratio),
+        "tempo_bpm": int(tempo)
+    }
+    return stats, image
+# === Vocal Removal (Karaoke Mode) ===
+def vocal_removal(audio_path):
+    stems = stem_split(audio_path)
+    instrumental = stems[0] + stems[1] + stems[2]  # drums + bass + other
+    out_path = os.path.join(tempfile.gettempdir(), "instrumental.wav")
+    torchaudio.save(out_path, instrumental, 44100)
+    return out_path
+# === Metadata Tagging ===
+def tag_mp3(file_path, title, artist, album, year):
+    try:
+        audio = MP3(file_path)
+        try:
+            audio.tags = ID3()
+        except:
+            audio.add_tags()
+        audio.tags.add(TIT2(encoding=3, text=title))
+        audio.tags.add(TPE1(encoding=3, text=artist))
+        if album:
+            audio.tags.add(TALB(encoding=3, text=album))
+        if year:
+            audio.tags.add(TYER(encoding=3, text=str(year)))
+        audio.save()
+        return file_path
+    except Exception as e:
+        return None
+# === Voice Style Transfer (Dummy) ===
+def apply_style_transfer(audio_path, mood="Happy"):
+    # Replace with real model later
+    return audio_path
+# === Session Sharing (URL Encode) ===
+def encode_preset(selected_effects, preset_name, export_format):
+    data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
+    encoded = base64.b64encode(json.dumps(data).encode()).decode()
+    return f"https://huggingface.co/spaces/YOUR_USERNAME/AudioMaster?preset={encoded}"
+# === UI ===
 effect_options = [
     "Noise Reduction",
     "Compress Dynamic Range",
     "Normalize"
 ]
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
+    gr.Markdown("## 🎧 AI Audio Studio – The Ultimate AI-Powered Tool\nUpload, edit, and export polished tracks — all powered by AI!")
+    # --- Single File Studio ---
     with gr.Tab("🎵 Single File Studio"):
         gr.Interface(
             fn=process_audio,
             clear_btn=None
         )
+    # --- Batch Processing ---
     with gr.Tab("🔊 Batch Processing"):
         gr.Interface(
             fn=batch_process_audio,
             inputs=[
+                gr.File(label="Upload Multiple Files", file_count="multiple"),
                 gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
                 gr.Checkbox(label="Isolate Vocals After Effects"),
                 gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
             clear_btn=None
         )
+    # --- Remix Mode ---
+    with gr.Tab("🎛 Remix Mode"):
         gr.Interface(
             fn=stem_split,
             inputs=gr.Audio(label="Upload Music Track", type="filepath"),
             clear_btn=None
         )
+    # --- Transcribe & Edit ---
+    with gr.Tab("📝 Transcribe & Edit"):
+        gr.Interface(
+            fn=transcribe_audio,
+            inputs=gr.Audio(label="Upload Audio", type="filepath"),
+            outputs=gr.Textbox(label="Transcribed Text", lines=10),
+            title="Transcribe & Edit Spoken Content",
+            description="Convert voice to text, then edit the script before exporting again."
+        )
+    # --- TTS Voice Generator ---
+    with gr.Tab("💬 TTS Voice Generator"):
+        gr.Interface(
+            fn=generate_tts,
+            inputs=gr.Textbox(label="Enter Text", lines=5),
+            outputs=gr.Audio(label="Generated Speech", type="filepath"),
+            title="Text-to-Speech Generator",
+            description="Type anything and turn it into natural-sounding speech."
+        )
+    # --- Audio Analysis Dashboard ---
+    with gr.Tab("📊 Audio Analysis"):
+        gr.Interface(
+            fn=analyze_audio,
+            inputs=gr.Audio(label="Upload Track", type="filepath"),
+            outputs=[
+                gr.JSON(label="Audio Stats"),
+                gr.Image(label="Waveform Graph")
+            ],
+            title="View Loudness, BPM, Silence, and More",
+            description="Analyze audio loudness, tempo, and frequency content."
+        )
+    # --- Voice Style Transfer ---
+    with gr.Tab("🧠 Voice Style Transfer"):
+        gr.Interface(
+            fn=apply_style_transfer,
+            inputs=[
+                gr.Audio(label="Upload Voice Clip", type="filepath"),
+                gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
+            ],
+            outputs=gr.Audio(label="Stylized Output", type="filepath"),
+            title="Change Emotional Tone of Voice",
+            description="Shift the emotional style of any voice clip."
+        )
+    # --- Session Sharing ---
+    with gr.Tab("🧾 Session Sharing"):
+        gr.Interface(
+            fn=encode_preset,
+            inputs=[
+                gr.CheckboxGroup(choices=effect_options, label="Effects"),
+                gr.Dropdown(choices=preset_names, label="Preset"),
+                gr.Dropdown(choices=["MP3", "WAV"], label="Format")
+            ],
+            outputs=gr.Textbox(label="Shareable Link", lines=1),
+            title="Save Your Settings and Share Them",
+            description="Generate a link to share your effect chain with others."
+        )
+    # --- Vocal Removal (Karaoke Mode) ---
+    with gr.Tab("🎯 Vocal Removal (Karaoke Mode)"):
+        gr.Interface(
+            fn=vocal_removal,
+            inputs=gr.Audio(label="Upload Song", type="filepath"),
+            outputs=gr.Audio(label="Instrumental Only", type="filepath"),
+            title="Remove Vocals from Any Track",
+            description="Create karaoke versions using AI"
+        )
+    # --- Metadata Tagging ---
+    with gr.Tab("🗂 Add MP3 Tags"):
+        gr.Interface(
+            fn=tag_mp3,
+            inputs=[
+                gr.File(label="Upload MP3/WAV"),
+                gr.Textbox(label="Title"),
+                gr.Textbox(label="Artist"),
+                gr.Textbox(label="Album"),
+                gr.Number(label="Year")
+            ],
+            outputs=gr.File(label="Tagged Audio File"),
+            title="Add Title, Artist, Album, Year to MP3",
+            description="Enhance your exported files with metadata tags"
+        )
 demo.launch()