Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12

Commit

2f52f6c

verified ·

1 Parent(s): 9f645d6

Update app.py

Browse files

Files changed (1) hide show

app.py +238 -48

app.py CHANGED Viewed

@@ -23,16 +23,6 @@ from mutagen.mp3 import MP3
 from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
 from TTS.api import TTS
 import pickle
-import subprocess
-# Install OpenVoice from GitHub during startup
-try:
-    import openvoice
-except ImportError:
-    print("Installing OpenVoice from GitHub...")
-    subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
-    from openvoice.se_extractor import get_se
-    from openvoice.api import ToneColorConverter
 # Suppress warnings
 warnings.filterwarnings("ignore")
@@ -292,23 +282,6 @@ def generate_tts(text):
     tts.tts_to_file(text=text, file_path=out_path)
     return out_path
-# === Save/Load Project File (.aiproj) ===
-def save_project(audio_path, preset_name, effects):
-    project_data = {
-        "audio": AudioSegment.from_file(audio_path).raw_data,
-        "preset": preset_name,
-        "effects": effects
-    }
-    out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
-    with open(out_path, "wb") as f:
-        pickle.dump(project_data, f)
-    return out_path
-def load_project(project_file):
-    with open(project_file.name, "rb") as f:
-        data = pickle.load(f)
-    return data["preset"], data["effects"]
 # === Trim Silence Automatically (VAD) ===
 def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
     audio = AudioSegment.from_file(audio_file)
@@ -336,27 +309,244 @@ def mix_tracks(track1, track2, volume_offset=0):
     mixed.export(out_path, format="wav")
     return out_path
-# === Voice Style Transfer (Clone Voice) ===
-from openvoice.se_extractor import get_se
-from openvoice.api import ToneColorConverter
-tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
 def clone_voice(source_audio, target_audio, text):
-    source_se, _ = get_se(source_audio)
-    target_se, _ = get_se(target_audio)
-    out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
-    # Generate base TTS
-    tts.tts_to_file(text=text, file_path=out_path)
-    # Apply voice conversion
-    tone_converter.convert(
-        audio_src_path=out_path,
-        src_se=source_se,
-        tgt_se=target_se,
-        output_path=out_path
-    )
-    return out_path

 from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
 from TTS.api import TTS
 import pickle
 # Suppress warnings
 warnings.filterwarnings("ignore")
     tts.tts_to_file(text=text, file_path=out_path)
     return out_path
 # === Trim Silence Automatically (VAD) ===
 def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
     audio = AudioSegment.from_file(audio_file)
     mixed.export(out_path, format="wav")
     return out_path
+# === Save/Load Project File (.aiproj) ===
+def save_project(audio_path, preset_name, effects):
+    project_data = {
+        "audio": AudioSegment.from_file(audio_path).raw_data,
+        "preset": preset_name,
+        "effects": effects
+    }
+    out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
+    with open(out_path, "wb") as f:
+        pickle.dump(project_data, f)
+    return out_path
+def load_project(project_file):
+    with open(project_file.name, "rb") as f:
+        data = pickle.load(f)
+    return data["preset"], data["effects"]
+# === Auto-Save / Resume Sessions ===
+def save_or_resume_session(audio, preset, effects, action="save"):
+    if action == "save":
+        return {"audio": audio, "preset": preset, "effects": effects}, None, None, None
+    elif action == "load" and isinstance(audio, dict):
+        return (
+            None,
+            audio.get("audio"),
+            audio.get("preset"),
+            audio.get("effects")
+        )
+    return None, None, None, None
+# === Voice Cloning – Fallback Version for Hugging Face ===
 def clone_voice(source_audio, target_audio, text):
+    print("⚠️ Voice cloning not available in browser version — use local install for full support")
+    return generate_tts(text)
+# === UI Setup ===
+effect_options = [
+    "Noise Reduction",
+    "Compress Dynamic Range",
+    "Add Reverb",
+    "Pitch Shift",
+    "Echo",
+    "Stereo Widening",
+    "Bass Boost",
+    "Treble Boost",
+    "Normalize"
+]
+with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
+    gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export — powered by AI!")
+    # --- Single File Studio ---
+    with gr.Tab("🎵 Single File Studio"):
+        gr.Interface(
+            fn=process_audio,
+            inputs=[
+                gr.Audio(label="Upload Audio", type="filepath"),
+                gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
+                gr.Checkbox(label="Isolate Vocals After Effects"),
+                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
+                gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
+            ],
+            outputs=[
+                gr.Audio(label="Processed Audio", type="filepath"),
+                gr.Image(label="Waveform Preview"),
+                gr.Textbox(label="Session Log (JSON)", lines=5),
+                gr.Textbox(label="Detected Genre", lines=1),
+                gr.Textbox(label="Status", value="✅ Ready", lines=1)
+            ],
+            title="Edit One File at a Time",
+            description="Apply effects, preview waveform, and get full session log.",
+            flagging_mode="never",
+            submit_btn="Process Audio",
+            clear_btn=None
+        )
+    # --- Batch Processing ---
+    with gr.Tab("🔊 Batch Processing"):
+        gr.Interface(
+            fn=batch_process_audio,
+            inputs=[
+                gr.File(label="Upload Multiple Files", file_count="multiple"),
+                gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
+                gr.Checkbox(label="Isolate Vocals After Effects"),
+                gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
+                gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
+            ],
+            outputs=[
+                gr.File(label="Download ZIP of All Processed Files"),
+                gr.Textbox(label="Status", value="✅ Ready", lines=1)
+            ],
+            title="Batch Audio Processor",
+            description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
+            flagging_mode="never",
+            submit_btn="Process All Files",
+            clear_btn=None
+        )
+    # --- Remix Mode ---
+    with gr.Tab("🎛 Remix Mode"):
+        gr.Interface(
+            fn=stem_split,
+            inputs=gr.Audio(label="Upload Music Track", type="filepath"),
+            outputs=[
+                gr.File(label="Vocals"),
+                gr.File(label="Drums"),
+                gr.File(label="Bass"),
+                gr.File(label="Other")
+            ],
+            title="Split Into Drums, Bass, Vocals, and More",
+            description="Use AI to separate musical elements like vocals, drums, and bass.",
+            flagging_mode="never",
+            clear_btn=None
+        )
+    # --- Transcribe & Edit ---
+    with gr.Tab("📝 Transcribe & Edit"):
+        gr.Interface(
+            fn=transcribe_audio,
+            inputs=gr.Audio(label="Upload Audio", type="filepath"),
+            outputs=gr.Textbox(label="Transcribed Text", lines=10),
+            title="Transcribe Spoken Content",
+            description="Convert voice to text and edit it before exporting again."
+        )
+    # --- TTS Voice Generator ---
+    with gr.Tab("💬 TTS Voice Generator"):
+        gr.Interface(
+            fn=generate_tts,
+            inputs=gr.Textbox(label="Enter Text", lines=5),
+            outputs=gr.Audio(label="Generated Speech", type="filepath"),
+            title="Text-to-Speech Generator",
+            description="Type anything and turn it into natural-sounding speech."
+        )
+    # --- VAD – Detect & Remove Silence ===
+    with gr.Tab("✂️ Trim Silence Automatically"):
+        gr.Interface(
+            fn=detect_silence,
+            inputs=[
+                gr.File(label="Upload Track"),
+                gr.Slider(minimum=-100, maximum=-10, value=-50, label="Silence Threshold (dB)"),
+                gr.Number(label="Min Silence Length (ms)", value=1000)
+            ],
+            outputs=gr.File(label="Trimmed Output"),
+            title="Auto-Detect & Remove Silence",
+            description="Detect and trim silence at start/end or between words"
+        )
+    # --- Load/Save Project File (.aiproj) ===
+    with gr.Tab("📁 Save/Load Project"):
+        gr.Interface(
+            fn=save_project,
+            inputs=[
+                gr.File(label="Original Audio"),
+                gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
+                gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
+            ],
+            outputs=gr.File(label="Project File (.aiproj)"),
+            title="Save Everything Together",
+            description="Save your session, effects, and settings in one file to reuse later."
+        )
+        gr.Interface(
+            fn=load_project,
+            inputs=gr.File(label="Upload .aiproj File"),
+            outputs=[
+                gr.Dropdown(choices=preset_names, label="Loaded Preset"),
+                gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
+            ],
+            title="Resume Last Project",
+            description="Load your saved session"
+        )
+    # --- Auto-Save / Resume Sessions ===
+    session_state = gr.State()
+    with gr.Tab("🧾 Auto-Save & Resume"):
+        gr.Markdown("Save your current state and resume editing later.")
+        action_radio = gr.Radio(["save", "load"], label="Action", value="save")
+        audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
+        preset_dropdown = gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0] if preset_names else None)
+        effect_checkbox = gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
+        save_btn = gr.Button("Save or Load Session")
+        loaded_audio = gr.Audio(label="Loaded Audio", type="filepath")
+        loaded_preset = gr.Dropdown(choices=preset_names, label="Loaded Preset")
+        loaded_effects = gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
+        save_btn.click(
+            fn=save_or_resume_session,
+            inputs=[audio_input, preset_dropdown, effect_checkbox, action_radio],
+            outputs=[session_state, loaded_audio, loaded_preset, loaded_effects]
+        )
+    # --- Mix Two Tracks ===
+    with gr.Tab("🔀 Mix Two Tracks"):
+        gr.Interface(
+            fn=mix_tracks,
+            inputs=[
+                gr.File(label="Main Track"),
+                gr.File(label="Background Track"),
+                gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
+            ],
+            outputs=gr.File(label="Mixed Output"),
+            title="Overlay Two Tracks",
+            description="Mix, blend, or subtract two audio files."
+        )
+    # === Voice Style Transfer (Dummy) ===
+    def apply_style_transfer(audio_path, mood="Happy"):
+        return audio_path
+    with gr.Tab("🧠 Voice Style Transfer"):
+        gr.Interface(
+            fn=apply_style_transfer,
+            inputs=[
+                gr.Audio(label="Upload Voice Clip", type="filepath"),
+                gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
+            ],
+            outputs=gr.Audio(label="Stylized Output", type="filepath"),
+            title="Change Emotional Tone of Voice",
+            description="Shift the emotional style of any voice clip."
+        )
+    # --- Voice Cloning (Fallback) ===
+    with gr.Tab("🎭 Voice Cloning (Demo)"):
+        gr.Interface(
+            fn=clone_voice,
+            inputs=[
+                gr.File(label="Source Voice Clip"),
+                gr.File(label="Target Voice Clip"),
+                gr.Textbox(label="Text to Clone", lines=5)
+            ],
+            outputs=gr.Audio(label="Cloned Output", type="filepath"),
+            title="Replace One Voice With Another (Demo)",
+            description="Clone voice from source to target speaker using AI"
+        )
+demo.launch()