Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12

Commit

31bd509

verified ·

1 Parent(s): 440dd71

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -87

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ from faster_whisper import WhisperModel
 from mutagen.mp3 import MP3
 from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
-# Suppress warnings for cleaner logs
 warnings.filterwarnings("ignore")
 # === Helper Functions ===
@@ -54,7 +54,7 @@ def apply_reverb(audio):
     return audio.overlay(reverb, position=1000)
 def apply_pitch_shift(audio, semitones=-2):
-    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))  # ✅ Fixed: extra closing parenthesis
     samples = np.array(audio.get_array_of_samples())
     resampled = np.interp(
         np.arange(0, len(samples), 2 ** (semitones / 12)),
@@ -306,47 +306,52 @@ def analyze_audio(audio_path):
     return stats, image
-# === Vocal Removal (Karaoke Mode) ===
-def vocal_removal(audio_path):
-    stems = stem_split(audio_path)
-    instrumental = stems[0] + stems[1] + stems[2]  # drums + bass + other
-    out_path = os.path.join(tempfile.gettempdir(), "instrumental.wav")
-    torchaudio.save(out_path, instrumental, 44100)
     return out_path
-# === Metadata Tagging ===
-def tag_mp3(file_path, title, artist, album, year):
-    try:
-        audio = MP3(file_path)
-        try:
-            audio.tags = ID3()
-        except:
-            audio.add_tags()
-        audio.tags.add(TIT2(encoding=3, text=title))
-        audio.tags.add(TPE1(encoding=3, text=artist))
-        if album:
-            audio.tags.add(TALB(encoding=3, text=album))
-        if year:
-            audio.tags.add(TYER(encoding=3, text=str(year)))
-        audio.save()
-        return file_path
-    except Exception as e:
-        return None
-# === Voice Style Transfer (Dummy) ===
-def apply_style_transfer(audio_path, mood="Happy"):
-    # Replace with real model later
-    return audio_path
-# === Session Sharing (URL Encode) ===
-def encode_preset(selected_effects, preset_name, export_format):
-    import base64
-    import json
-    data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
-    encoded = base64.b64encode(json.dumps(data).encode()).decode()
-    return f"https://huggingface.co/spaces/tee342/AudioMaster?preset={encoded}"
-# === UI ===
 effect_options = [
     "Noise Reduction",
     "Compress Dynamic Range",
@@ -360,7 +365,7 @@ effect_options = [
 ]
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
-    gr.Markdown("## 🎧 AI Audio Studio – The Ultimate AI-Powered Tool\nUpload, edit, and export polished tracks — all powered by AI!")
     # --- Single File Studio ---
     with gr.Tab("🎵 Single File Studio"):
@@ -433,7 +438,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             inputs=gr.Audio(label="Upload Audio", type="filepath"),
             outputs=gr.Textbox(label="Transcribed Text", lines=10),
             title="Transcribe & Edit Spoken Content",
-            description="Convert voice to text, then edit the script before exporting again."
         )
     # --- TTS Voice Generator ---
@@ -446,70 +451,68 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Type anything and turn it into natural-sounding speech."
         )
-    # --- Audio Analysis Dashboard ---
-    with gr.Tab("📊 Audio Analysis"):
         gr.Interface(
-            fn=analyze_audio,
-            inputs=gr.Audio(label="Upload Track", type="filepath"),
-            outputs=[
-                gr.JSON(label="Audio Stats"),
-                gr.Image(label="Waveform Graph")
             ],
-            title="View Loudness, BPM, Silence, and More",
-            description="Analyze audio loudness, tempo, and frequency content."
         )
-    # --- Voice Style Transfer ---
-    with gr.Tab("🧠 Voice Style Transfer"):
         gr.Interface(
-            fn=apply_style_transfer,
             inputs=[
-                gr.Audio(label="Upload Voice Clip", type="filepath"),
-                gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
             ],
-            outputs=gr.Audio(label="Stylized Output", type="filepath"),
-            title="Change Emotional Tone of Voice",
-            description="Shift the emotional style of any voice clip."
         )
-    # --- Session Sharing ---
-    with gr.Tab("🧾 Session Sharing"):
         gr.Interface(
-            fn=encode_preset,
-            inputs=[
-                gr.CheckboxGroup(choices=effect_options, label="Effects"),
-                gr.Dropdown(choices=preset_names, label="Preset"),
-                gr.Dropdown(choices=["MP3", "WAV"], label="Format")
-            ],
-            outputs=gr.Textbox(label="Shareable Link", lines=1),
-            title="Save Your Settings and Share Them",
-            description="Generate a link to share your effect chain with others."
         )
-    # --- Vocal Removal (Karaoke Mode) ---
-    with gr.Tab("🎯 Vocal Removal (Karaoke Mode)"):
         gr.Interface(
-            fn=vocal_removal,
-            inputs=gr.Audio(label="Upload Song", type="filepath"),
-            outputs=gr.Audio(label="Instrumental Only", type="filepath"),
-            title="Remove Vocals from Any Track",
-            description="Create karaoke versions using AI"
         )
-    # --- Metadata Tagging ---
-    with gr.Tab("🗂 Add MP3 Tags"):
         gr.Interface(
-            fn=tag_mp3,
             inputs=[
-                gr.File(label="Upload MP3/WAV"),
-                gr.Textbox(label="Title"),
-                gr.Textbox(label="Artist"),
-                gr.Textbox(label="Album"),
-                gr.Number(label="Year")
             ],
-            outputs=gr.File(label="Tagged Audio File"),
-            title="Add Title, Artist, Album, Year to MP3",
-            description="Enhance your exported files with metadata tags"
         )
 demo.launch()

 from mutagen.mp3 import MP3
 from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
+# Suppress warnings
 warnings.filterwarnings("ignore")
 # === Helper Functions ===
     return audio.overlay(reverb, position=1000)
 def apply_pitch_shift(audio, semitones=-2):
+    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
     samples = np.array(audio.get_array_of_samples())
     resampled = np.interp(
         np.arange(0, len(samples), 2 ** (semitones / 12)),
     return stats, image
+# === Auto-Save Sessions ===
+def auto_save_session(data):
+    import base64
+    encoded = base64.b64encode(json.dumps(data).encode()).decode()
+    return f"https://your-studio-url?load={encoded}"
+# === Voiceprint Matching ===
+from resemblyzer import preprocess_wav, VoiceEncoder
+encoder = VoiceEncoder()
+def match_speakers(clip1, clip2):
+    wav1 = preprocess_wav(clip1)
+    wav2 = preprocess_wav(clip2)
+    embed1 = encoder.embed_utterance(wav1)
+    embed2 = encoder.embed_utterance(wav2)
+    similarity = np.inner(embed1, embed2)
+    return f"Speaker Match Score: {similarity:.2f}"
+# === Mix Two Tracks ===
+def mix_tracks(track1, track2, volume_offset=0):
+    a1 = AudioSegment.from_file(track1)
+    a2 = AudioSegment.from_file(track2)
+    mixed = a1.overlay(a2 - volume_offset)
+    out_path = os.path.join(tempfile.gettempdir(), "mixed.wav")
+    mixed.export(out_path, format="wav")
     return out_path
+# === Save/Load Project File (.aiproj) ===
+def save_project(audio_path, preset_name, effects):
+    project_data = {
+        "audio": AudioSegment.from_file(audio_path).raw_data,
+        "preset": preset_name,
+        "effects": effects
+    }
+    out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
+    with open(out_path, "wb") as f:
+        pickle.dump(project_data, f)
+    return out_path
+def load_project(project_file):
+    with open(project_file.name, "rb") as f:
+        data = pickle.load(f)
+    return data["preset"], data["effects"]
+# === UI ===
 effect_options = [
     "Noise Reduction",
     "Compress Dynamic Range",
 ]
 with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
+    gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export — powered by AI!")
     # --- Single File Studio ---
     with gr.Tab("🎵 Single File Studio"):
             inputs=gr.Audio(label="Upload Audio", type="filepath"),
             outputs=gr.Textbox(label="Transcribed Text", lines=10),
             title="Transcribe & Edit Spoken Content",
+            description="Convert voice to text and edit it before re-exporting."
         )
     # --- TTS Voice Generator ---
             description="Type anything and turn it into natural-sounding speech."
         )
+    # --- Voiceprint Matching ---
+    with gr.Tab("🧏‍♂️ Match Speakers"):
         gr.Interface(
+            fn=match_speakers,
+            inputs=[
+                gr.File(label="Clip 1"),
+                gr.File(label="Clip 2")
             ],
+            outputs=gr.Textbox(label="Match Score", lines=1),
+            title="Are These the Same Person?",
+            description="Detect speaker similarity using AI."
         )
+    # --- Voice Cloning (AI Dubbing) ---
+    with gr.Tab("🎭 Voice Cloning (AI Dubbing)"):
         gr.Interface(
+            fn=clone_voice,
             inputs=[
+                gr.File(label="Source Voice"),
+                gr.File(label="Target Voice")
             ],
+            outputs=gr.Audio(label="Cloned Output", type="filepath"),
+            title="Replace Voice with Another",
+            description="Clone voice from source to target speaker."
         )
+    # --- AI Mastering Mode ---
+    with gr.Tab("📈 AI Mastering Mode"):
         gr.Interface(
+            fn=ai_mastering,
+            inputs=gr.Audio(label="Upload Track", type="filepath"),
+            outputs=gr.Audio(label="Mastered Output", type="filepath"),
+            title="Auto-Master Your Track",
+            description="Smart mastering for streaming platforms like Spotify, YouTube, or podcasts."
         )
+    # --- Mix Two Tracks ---
+    with gr.Tab("🔀 Mix Two Tracks"):
         gr.Interface(
+            fn=mix_tracks,
+            inputs=[
+                gr.File(label="Main Track"),
+                gr.File(label="Background Track"),
+                gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
+            ],
+            outputs=gr.File(label="Mixed Output"),
+            title="Overlay Two Tracks",
+            description="Mix, blend, or subtract two audio files."
         )
+    # --- Load/Save Project ---
+    with gr.Tab("📁 Save/Load Project"):
         gr.Interface(
+            fn=save_project,
             inputs=[
+                gr.File(label="Original Audio"),
+                gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
+                gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
             ],
+            outputs=gr.File(label="Project File (.aiproj)"),
+            title="Save Everything Together",
+            description="Save your session, effects, and settings in one file to reuse later."
         )
 demo.launch()