Spaces:

amaai-lab
/

SonicMaster

Running on Zero

App Files Files Community

ambujm22 commited on 3 days ago

Commit

5f61a8c

1 Parent(s): 74c76be

Add SonicMaster Gradio app

Browse files

Files changed (4) hide show

.vscode/settings.json +4 -0
README.md +9 -11
app.py +223 -0
requirements.txt +12 -0

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "python-envs.defaultEnvManager": "ms-python.python:system",
+    "python-envs.pythonProjects": []
+}

README.md CHANGED Viewed

@@ -1,18 +1,16 @@
 ---
-title: SonicMaster
-emoji: 👀
 colorFrom: indigo
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.39.0
-app_file: app.py
-pinned: false
 license: apache-2.0
-short_description: 'SonicMaster: Towards Controllable All-in-One Music Restorati'
 ---
-Space will appear soon.
-Read paper: https://huggingface.co/papers/2508.03448

 ---
+title: SonicMaster – Text-Guided Music Restoration & Mastering
+emoji: 🎧
 colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: 4.44.0
+python_version: 3.10
 license: apache-2.0
 ---
+Upload audio or pick an example, write a text prompt (e.g., "reduce reverb", "clean distortion", "enhance clarity"), and SonicMaster improves the audio.
+- Model weights: https://huggingface.co/amaai-lab/SonicMaster
+- Paper: https://arxiv.org/abs/2508.03448
+- Project page: https://amaai-lab.github.io/SonicMaster/

app.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import os
+import sys
+import subprocess
+from pathlib import Path
+from typing import Tuple, Optional
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from huggingface_hub import hf_hub_download
+SPACE_ROOT = Path(__file__).parent.resolve()
+REPO_DIR   = SPACE_ROOT / "SonicMasterRepo"
+WEIGHTS_REPO = "amaai-lab/SonicMaster"
+WEIGHTS_FILE = "model.safetensors"   # from the HF model repo
+CACHE_DIR = SPACE_ROOT / "weights"
+CACHE_DIR.mkdir(parents=True, exist_ok=True)
+# ---------- 1) Pull weights from HF Hub ----------
+def get_weights_path() -> Path:
+    weights_path = hf_hub_download(
+        repo_id=WEIGHTS_REPO,
+        filename=WEIGHTS_FILE,
+        local_dir=CACHE_DIR.as_posix(),
+        local_dir_use_symlinks=False,
+        force_download=False,
+        resume_download=True,
+    )
+    return Path(weights_path)
+# ---------- 2) Clone GitHub repo for code (model.py / inference_*.py ) ----------
+def ensure_repo() -> Path:
+    if not REPO_DIR.exists():
+        subprocess.run(
+            ["git", "clone", "--depth", "1", "https://github.com/AMAAI-Lab/SonicMaster", REPO_DIR.as_posix()],
+            check=True,
+        )
+    if REPO_DIR.as_posix() not in sys.path:
+        sys.path.append(REPO_DIR.as_posix())
+    return REPO_DIR
+# ---------- 3) Examples: use only *.wav from samples/inputs ----------
+def build_examples():
+    """
+    Discover up to 10 .wav files from:
+        SonicMasterRepo/samples/inputs
+    and pair them with prompts for gr.Examples.
+    """
+    repo = ensure_repo()
+    wav_dir = repo / "samples" / "inputs"
+    wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
+    prompts = [
+        "Increase the clarity of this song by emphasizing treble frequencies.",
+        "Make this song sound more boomy by amplifying the low end bass frequencies.",
+        "Can you make this sound louder, please?",
+        "Make the audio smoother and less distorted.",
+        "Improve the balance in this song.",
+        "Disentangle the left and right channels to give this song a stereo feeling.",
+        "Correct the unnatural frequency emphasis. Reduce the roominess or echo.",
+        "Raise the level of the vocals, please.",
+        "Increase the clarity of this song by emphasizing treble frequencies.",
+        "Please, dereverb this audio.",
+    ]
+    examples = []
+    for i, p in enumerate(wav_paths[:10]):
+        prompt = prompts[i] if i < len(prompts) else prompts[-1]
+        examples.append([p.as_posix(), prompt])
+    # Fallback: if no wavs found, provide an empty list (Gradio handles it)
+    return examples
+# ---------- 4) I/O helpers ----------
+def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
+    # Ensure (samples, channels) for soundfile
+    if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
+        # (channels, samples) -> (samples, channels)
+        data = wav.T
+    else:
+        data = wav
+    sf.write(path.as_posix(), data, sr)
+def read_audio(path: str) -> Tuple[np.ndarray, int]:
+    wav, sr = sf.read(path, always_2d=False)
+    if wav.dtype == np.float64:
+        wav = wav.astype(np.float32)
+    return wav, sr
+def run_sonicmaster_cli(
+    input_wav_path: Path,
+    prompt: str,
+    out_path: Path,
+    _logs: list,  # kept for compatibility, but not shown in UI
+    progress: Optional[gr.Progress] = None
+) -> bool:
+    """
+    Uses the current Python interpreter and tries a few script names/flags.
+    """
+    import sys, shutil
+    if progress: progress(0.15, desc="Loading weights & repo")
+    ckpt = get_weights_path()
+    repo = ensure_repo()
+    # Use the exact Python interpreter running this process
+    py = sys.executable or shutil.which("python3") or shutil.which("python") or "python3"
+    # Prefer the scripts we know accept --ckpt/--input/--prompt/--output
+    script_candidates = [
+        repo / "infer_single.py",         # if you kept your own name
+    ]
+    CANDIDATE_CMDS = []
+    for script in script_candidates:
+        if script.exists():
+            CANDIDATE_CMDS.append([
+                py, script.as_posix(),
+                "--ckpt", ckpt.as_posix(),
+                "--input", input_wav_path.as_posix(),
+                "--prompt", prompt,
+                "--output", out_path.as_posix(),
+            ])
+    # As a last resort, try alternative flag names (if someone changed the CLI)
+    for script in script_candidates:
+        if script.exists():
+            CANDIDATE_CMDS.append([
+                py, script.as_posix(),
+                "--weights", ckpt.as_posix(),
+                "--input", input_wav_path.as_posix(),
+                "--text", prompt,
+                "--out", out_path.as_posix(),
+            ])
+    if not CANDIDATE_CMDS:
+        return False
+    for idx, cmd in enumerate(CANDIDATE_CMDS, start=1):
+        try:
+            if progress: progress(0.35 + 0.05*idx, desc=f"Running inference (try {idx})")
+            res = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            if out_path.exists() and out_path.stat().st_size > 0:
+                if progress: progress(0.9, desc="Post-processing output")
+                return True
+        except subprocess.CalledProcessError:
+            continue
+        except Exception:
+            continue
+    return False
+def enhance_audio_ui(
+    audio_path: str,
+    prompt: str,
+    progress=gr.Progress(track_tqdm=True)
+) -> Tuple[int, np.ndarray]:
+    """
+    Gradio callback: accepts a file path, a prompt, and returns enhanced audio.
+    """
+    if progress: progress(0.0, desc="Validating input")
+    if not audio_path or not prompt:
+        raise gr.Error("Please provide audio and a text prompt.")
+    # Standardize input -> temp wav
+    wav, sr = read_audio(audio_path)
+    if progress: progress(0.15, desc="Preparing audio")
+    tmp_in = SPACE_ROOT / "tmp_in.wav"
+    tmp_out = SPACE_ROOT / "tmp_out.wav"
+    if tmp_out.exists():
+        try:
+            tmp_out.unlink()
+        except Exception:
+            pass
+    save_temp_wav(wav, sr, tmp_in)
+    # Run model
+    if progress: progress(0.3, desc="Starting inference")
+    ok = run_sonicmaster_cli(tmp_in, prompt, tmp_out, _logs=[], progress=progress)
+    # Return output (or echo input)
+    if ok and tmp_out.exists() and tmp_out.stat().st_size > 0:
+        out_wav, out_sr = read_audio(tmp_out.as_posix())
+        if progress: progress(1.0, desc="Done")
+        return (out_sr, out_wav)
+    else:
+        if progress: progress(1.0, desc="No output produced")
+        # Return original audio if model didn't produce output
+        return (sr, wav)
+# ---------- 6) Gradio UI ----------
+with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as demo:
+    gr.Markdown("## 🎧 SonicMaster\nUpload or choose an example (from repo: `samples/inputs/*.wav`), write a text prompt (e.g., *reduce reverb*, *clean distortion*), then click **Enhance**.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            in_audio = gr.Audio(label="Input Audio (upload or use examples)", type="filepath")
+            prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., reduce reverb and enhance clarity")
+            run_btn = gr.Button("🚀 Enhance", variant="primary")
+            # Use wavs from SonicMasterRepo/samples/inputs
+            gr.Examples(
+                examples=build_examples(),
+                inputs=[in_audio, prompt],
+                label="Examples (repo: samples/inputs/*.wav)"
+            )
+        with gr.Column(scale=1):
+            out_audio = gr.Audio(label="Enhanced Audio (output)")
+    # Per-event concurrency (use 1 unless you know your VRAM/CPU can handle more)
+    run_btn.click(
+        fn=enhance_audio_ui,
+        inputs=[in_audio, prompt],
+        outputs=[out_audio],
+        concurrency_limit=1,
+    )
+# Warm up cache & repo, then launch
+_ = get_weights_path()
+_ = ensure_repo()
+demo.queue(max_size=16).launch()
+# Or, a global default for all events:
+# demo.queue(max_size=16, default_concurrency_limit=1).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+--extra-index-url https://download.pytorch.org/whl/cu124
+torch==2.6.0+cu124
+torchaudio==2.6.0+cu124
+transformers>=4.43
+accelerate>=0.33
+diffusers>=0.31
+soundfile
+gradio>=4.41
+huggingface_hub
+safetensors
+pyyaml