ambujm22 commited on
Commit
5f61a8c
Β·
1 Parent(s): 74c76be

Add SonicMaster Gradio app

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +4 -0
  2. README.md +9 -11
  3. app.py +223 -0
  4. requirements.txt +12 -0
.vscode/settings.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:system",
3
+ "python-envs.pythonProjects": []
4
+ }
README.md CHANGED
@@ -1,18 +1,16 @@
1
  ---
2
- title: SonicMaster
3
- emoji: πŸ‘€
4
  colorFrom: indigo
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.39.0
8
- app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
- short_description: 'SonicMaster: Towards Controllable All-in-One Music Restorati'
12
  ---
13
 
14
- Space will appear soon.
15
-
16
- Read paper: https://huggingface.co/papers/2508.03448
17
-
18
 
 
 
 
 
1
  ---
2
+ title: SonicMaster – Text-Guided Music Restoration & Mastering
3
+ emoji: 🎧
4
  colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
+ python_version: 3.10
 
9
  license: apache-2.0
 
10
  ---
11
 
12
+ Upload audio or pick an example, write a text prompt (e.g., "reduce reverb", "clean distortion", "enhance clarity"), and SonicMaster improves the audio.
 
 
 
13
 
14
+ - Model weights: https://huggingface.co/amaai-lab/SonicMaster
15
+ - Paper: https://arxiv.org/abs/2508.03448
16
+ - Project page: https://amaai-lab.github.io/SonicMaster/
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Tuple, Optional
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ import soundfile as sf
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ SPACE_ROOT = Path(__file__).parent.resolve()
13
+ REPO_DIR = SPACE_ROOT / "SonicMasterRepo"
14
+ WEIGHTS_REPO = "amaai-lab/SonicMaster"
15
+ WEIGHTS_FILE = "model.safetensors" # from the HF model repo
16
+ CACHE_DIR = SPACE_ROOT / "weights"
17
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
18
+
19
+ # ---------- 1) Pull weights from HF Hub ----------
20
+ def get_weights_path() -> Path:
21
+ weights_path = hf_hub_download(
22
+ repo_id=WEIGHTS_REPO,
23
+ filename=WEIGHTS_FILE,
24
+ local_dir=CACHE_DIR.as_posix(),
25
+ local_dir_use_symlinks=False,
26
+ force_download=False,
27
+ resume_download=True,
28
+ )
29
+ return Path(weights_path)
30
+
31
+ # ---------- 2) Clone GitHub repo for code (model.py / inference_*.py ) ----------
32
+ def ensure_repo() -> Path:
33
+ if not REPO_DIR.exists():
34
+ subprocess.run(
35
+ ["git", "clone", "--depth", "1", "https://github.com/AMAAI-Lab/SonicMaster", REPO_DIR.as_posix()],
36
+ check=True,
37
+ )
38
+ if REPO_DIR.as_posix() not in sys.path:
39
+ sys.path.append(REPO_DIR.as_posix())
40
+ return REPO_DIR
41
+
42
+ # ---------- 3) Examples: use only *.wav from samples/inputs ----------
43
+ def build_examples():
44
+ """
45
+ Discover up to 10 .wav files from:
46
+ SonicMasterRepo/samples/inputs
47
+ and pair them with prompts for gr.Examples.
48
+ """
49
+ repo = ensure_repo()
50
+ wav_dir = repo / "samples" / "inputs"
51
+ wav_paths = sorted(p for p in wav_dir.glob("*.wav") if p.is_file())
52
+
53
+ prompts = [
54
+ "Increase the clarity of this song by emphasizing treble frequencies.",
55
+ "Make this song sound more boomy by amplifying the low end bass frequencies.",
56
+ "Can you make this sound louder, please?",
57
+ "Make the audio smoother and less distorted.",
58
+ "Improve the balance in this song.",
59
+ "Disentangle the left and right channels to give this song a stereo feeling.",
60
+ "Correct the unnatural frequency emphasis. Reduce the roominess or echo.",
61
+ "Raise the level of the vocals, please.",
62
+ "Increase the clarity of this song by emphasizing treble frequencies.",
63
+ "Please, dereverb this audio.",
64
+ ]
65
+
66
+ examples = []
67
+ for i, p in enumerate(wav_paths[:10]):
68
+ prompt = prompts[i] if i < len(prompts) else prompts[-1]
69
+ examples.append([p.as_posix(), prompt])
70
+
71
+ # Fallback: if no wavs found, provide an empty list (Gradio handles it)
72
+ return examples
73
+
74
+ # ---------- 4) I/O helpers ----------
75
+ def save_temp_wav(wav: np.ndarray, sr: int, path: Path):
76
+ # Ensure (samples, channels) for soundfile
77
+ if wav.ndim == 2 and wav.shape[0] < wav.shape[1]:
78
+ # (channels, samples) -> (samples, channels)
79
+ data = wav.T
80
+ else:
81
+ data = wav
82
+ sf.write(path.as_posix(), data, sr)
83
+
84
+ def read_audio(path: str) -> Tuple[np.ndarray, int]:
85
+ wav, sr = sf.read(path, always_2d=False)
86
+ if wav.dtype == np.float64:
87
+ wav = wav.astype(np.float32)
88
+ return wav, sr
89
+
90
+ def run_sonicmaster_cli(
91
+ input_wav_path: Path,
92
+ prompt: str,
93
+ out_path: Path,
94
+ _logs: list, # kept for compatibility, but not shown in UI
95
+ progress: Optional[gr.Progress] = None
96
+ ) -> bool:
97
+ """
98
+ Uses the current Python interpreter and tries a few script names/flags.
99
+ """
100
+ import sys, shutil
101
+
102
+ if progress: progress(0.15, desc="Loading weights & repo")
103
+ ckpt = get_weights_path()
104
+ repo = ensure_repo()
105
+
106
+ # Use the exact Python interpreter running this process
107
+ py = sys.executable or shutil.which("python3") or shutil.which("python") or "python3"
108
+
109
+ # Prefer the scripts we know accept --ckpt/--input/--prompt/--output
110
+ script_candidates = [
111
+ repo / "infer_single.py", # if you kept your own name
112
+ ]
113
+
114
+ CANDIDATE_CMDS = []
115
+ for script in script_candidates:
116
+ if script.exists():
117
+ CANDIDATE_CMDS.append([
118
+ py, script.as_posix(),
119
+ "--ckpt", ckpt.as_posix(),
120
+ "--input", input_wav_path.as_posix(),
121
+ "--prompt", prompt,
122
+ "--output", out_path.as_posix(),
123
+ ])
124
+
125
+ # As a last resort, try alternative flag names (if someone changed the CLI)
126
+ for script in script_candidates:
127
+ if script.exists():
128
+ CANDIDATE_CMDS.append([
129
+ py, script.as_posix(),
130
+ "--weights", ckpt.as_posix(),
131
+ "--input", input_wav_path.as_posix(),
132
+ "--text", prompt,
133
+ "--out", out_path.as_posix(),
134
+ ])
135
+
136
+ if not CANDIDATE_CMDS:
137
+ return False
138
+
139
+ for idx, cmd in enumerate(CANDIDATE_CMDS, start=1):
140
+ try:
141
+ if progress: progress(0.35 + 0.05*idx, desc=f"Running inference (try {idx})")
142
+ res = subprocess.run(cmd, capture_output=True, text=True, check=True)
143
+ if out_path.exists() and out_path.stat().st_size > 0:
144
+ if progress: progress(0.9, desc="Post-processing output")
145
+ return True
146
+ except subprocess.CalledProcessError:
147
+ continue
148
+ except Exception:
149
+ continue
150
+ return False
151
+
152
+
153
+ def enhance_audio_ui(
154
+ audio_path: str,
155
+ prompt: str,
156
+ progress=gr.Progress(track_tqdm=True)
157
+ ) -> Tuple[int, np.ndarray]:
158
+ """
159
+ Gradio callback: accepts a file path, a prompt, and returns enhanced audio.
160
+ """
161
+ if progress: progress(0.0, desc="Validating input")
162
+ if not audio_path or not prompt:
163
+ raise gr.Error("Please provide audio and a text prompt.")
164
+
165
+ # Standardize input -> temp wav
166
+ wav, sr = read_audio(audio_path)
167
+ if progress: progress(0.15, desc="Preparing audio")
168
+ tmp_in = SPACE_ROOT / "tmp_in.wav"
169
+ tmp_out = SPACE_ROOT / "tmp_out.wav"
170
+ if tmp_out.exists():
171
+ try:
172
+ tmp_out.unlink()
173
+ except Exception:
174
+ pass
175
+
176
+ save_temp_wav(wav, sr, tmp_in)
177
+
178
+ # Run model
179
+ if progress: progress(0.3, desc="Starting inference")
180
+ ok = run_sonicmaster_cli(tmp_in, prompt, tmp_out, _logs=[], progress=progress)
181
+
182
+ # Return output (or echo input)
183
+ if ok and tmp_out.exists() and tmp_out.stat().st_size > 0:
184
+ out_wav, out_sr = read_audio(tmp_out.as_posix())
185
+ if progress: progress(1.0, desc="Done")
186
+ return (out_sr, out_wav)
187
+ else:
188
+ if progress: progress(1.0, desc="No output produced")
189
+ # Return original audio if model didn't produce output
190
+ return (sr, wav)
191
+
192
+ # ---------- 6) Gradio UI ----------
193
+ with gr.Blocks(title="SonicMaster – Text-Guided Restoration & Mastering", fill_height=True) as demo:
194
+ gr.Markdown("## 🎧 SonicMaster\nUpload or choose an example (from repo: `samples/inputs/*.wav`), write a text prompt (e.g., *reduce reverb*, *clean distortion*), then click **Enhance**.")
195
+ with gr.Row():
196
+ with gr.Column(scale=1):
197
+ in_audio = gr.Audio(label="Input Audio (upload or use examples)", type="filepath")
198
+ prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., reduce reverb and enhance clarity")
199
+ run_btn = gr.Button("πŸš€ Enhance", variant="primary")
200
+
201
+ # Use wavs from SonicMasterRepo/samples/inputs
202
+ gr.Examples(
203
+ examples=build_examples(),
204
+ inputs=[in_audio, prompt],
205
+ label="Examples (repo: samples/inputs/*.wav)"
206
+ )
207
+ with gr.Column(scale=1):
208
+ out_audio = gr.Audio(label="Enhanced Audio (output)")
209
+
210
+ # Per-event concurrency (use 1 unless you know your VRAM/CPU can handle more)
211
+ run_btn.click(
212
+ fn=enhance_audio_ui,
213
+ inputs=[in_audio, prompt],
214
+ outputs=[out_audio],
215
+ concurrency_limit=1,
216
+ )
217
+
218
+ # Warm up cache & repo, then launch
219
+ _ = get_weights_path()
220
+ _ = ensure_repo()
221
+ demo.queue(max_size=16).launch()
222
+ # Or, a global default for all events:
223
+ # demo.queue(max_size=16, default_concurrency_limit=1).launch()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu124
2
+ torch==2.6.0+cu124
3
+ torchaudio==2.6.0+cu124
4
+
5
+ transformers>=4.43
6
+ accelerate>=0.33
7
+ diffusers>=0.31
8
+ soundfile
9
+ gradio>=4.41
10
+ huggingface_hub
11
+ safetensors
12
+ pyyaml