test spleeter
Browse files- app.py +48 -71
- requirements.txt +2 -2
- stem_separation_spleeter.py +156 -0
app.py
CHANGED
@@ -12,8 +12,6 @@ import warnings
|
|
12 |
import soundfile as sf
|
13 |
import librosa
|
14 |
import time
|
15 |
-
import subprocess
|
16 |
-
import shutil
|
17 |
warnings.filterwarnings("ignore")
|
18 |
|
19 |
# --- Setup the models ---
|
@@ -28,29 +26,28 @@ htdemucs_model = htdemucs_model.to(device)
|
|
28 |
htdemucs_model.eval()
|
29 |
print("HT-Demucs model loaded successfully.")
|
30 |
|
31 |
-
# Setup Spleeter with
|
32 |
print("Setting up Spleeter...")
|
|
|
|
|
33 |
spleeter_available = False
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
return False
|
52 |
-
|
53 |
-
spleeter_available = check_spleeter_installation()
|
54 |
|
55 |
# --- HT-Demucs separation function ---
|
56 |
def separate_with_htdemucs(audio_path):
|
@@ -102,13 +99,13 @@ def separate_with_htdemucs(audio_path):
|
|
102 |
def separate_with_spleeter(audio_path):
|
103 |
"""
|
104 |
Separates an audio file using Spleeter into vocals, drums, bass, other, and piano.
|
105 |
-
Uses
|
106 |
Returns FILE PATHS.
|
107 |
"""
|
108 |
if audio_path is None:
|
109 |
return None, None, None, None, None, "Please upload an audio file."
|
110 |
|
111 |
-
if not spleeter_available:
|
112 |
return None, None, None, None, None, "β Spleeter not available. Please install Spleeter."
|
113 |
|
114 |
try:
|
@@ -119,62 +116,42 @@ def separate_with_spleeter(audio_path):
|
|
119 |
output_dir = f"spleeter_stems_{timestamp}"
|
120 |
os.makedirs(output_dir, exist_ok=True)
|
121 |
|
122 |
-
#
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
'-o', output_dir,
|
127 |
-
'-p', 'spleeter:5stems-16kHz'
|
128 |
-
]
|
129 |
-
|
130 |
-
print(f"Spleeter: Running command: {' '.join(cmd)}")
|
131 |
-
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
132 |
-
|
133 |
-
if result.returncode != 0:
|
134 |
-
print(f"Spleeter command failed: {result.stderr}")
|
135 |
-
return None, None, None, None, None, f"β Spleeter command failed: {result.stderr}"
|
136 |
|
|
|
|
|
|
|
137 |
print("Spleeter: Separation complete.")
|
|
|
138 |
|
139 |
-
#
|
140 |
-
# Spleeter creates a subdirectory with the input filename
|
141 |
-
input_filename = os.path.splitext(os.path.basename(audio_path))[0]
|
142 |
-
spleeter_output_dir = os.path.join(output_dir, input_filename)
|
143 |
-
|
144 |
-
if not os.path.exists(spleeter_output_dir):
|
145 |
-
print(f"Expected output directory not found: {spleeter_output_dir}")
|
146 |
-
return None, None, None, None, None, "β Spleeter output directory not found"
|
147 |
-
|
148 |
-
# Map Spleeter output files to our expected order
|
149 |
-
stem_mapping = {
|
150 |
-
"vocals": "vocals.wav",
|
151 |
-
"drums": "drums.wav",
|
152 |
-
"bass": "bass.wav",
|
153 |
-
"other": "other.wav",
|
154 |
-
"piano": "piano.wav"
|
155 |
-
}
|
156 |
-
|
157 |
output_paths = []
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
166 |
else:
|
167 |
-
print(f"β οΈ Warning: {stem_name}
|
168 |
output_paths.append(None)
|
169 |
|
170 |
-
#
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
return output_paths[0], output_paths[1], output_paths[2], output_paths[3], output_paths[4], "β
Spleeter separation successful!"
|
175 |
|
176 |
-
except subprocess.TimeoutExpired:
|
177 |
-
return None, None, None, None, None, "β Spleeter separation timed out (5 minutes)"
|
178 |
except Exception as e:
|
179 |
print(f"Spleeter Error: {e}")
|
180 |
import traceback
|
|
|
12 |
import soundfile as sf
|
13 |
import librosa
|
14 |
import time
|
|
|
|
|
15 |
warnings.filterwarnings("ignore")
|
16 |
|
17 |
# --- Setup the models ---
|
|
|
26 |
htdemucs_model.eval()
|
27 |
print("HT-Demucs model loaded successfully.")
|
28 |
|
29 |
+
# Setup Spleeter with Python API approach
|
30 |
print("Setting up Spleeter...")
|
31 |
+
spleeter_separator = None
|
32 |
+
spleeter_audio_adapter = None
|
33 |
spleeter_available = False
|
34 |
|
35 |
+
try:
|
36 |
+
from spleeter.separator import Separator
|
37 |
+
from spleeter.audio.adapter import AudioAdapter
|
38 |
+
|
39 |
+
# Initialize Spleeter separator for 5stems model
|
40 |
+
print("Creating Spleeter 5stems separator...")
|
41 |
+
spleeter_separator = Separator('spleeter:5stems')
|
42 |
+
spleeter_audio_adapter = AudioAdapter.default()
|
43 |
+
spleeter_available = True
|
44 |
+
print("β
Spleeter 5stems model loaded successfully!")
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(f"β Failed to load Spleeter: {e}")
|
48 |
+
spleeter_separator = None
|
49 |
+
spleeter_audio_adapter = None
|
50 |
+
spleeter_available = False
|
|
|
|
|
|
|
51 |
|
52 |
# --- HT-Demucs separation function ---
|
53 |
def separate_with_htdemucs(audio_path):
|
|
|
99 |
def separate_with_spleeter(audio_path):
|
100 |
"""
|
101 |
Separates an audio file using Spleeter into vocals, drums, bass, other, and piano.
|
102 |
+
Uses Python API approach from stem_separation_spleeter.py
|
103 |
Returns FILE PATHS.
|
104 |
"""
|
105 |
if audio_path is None:
|
106 |
return None, None, None, None, None, "Please upload an audio file."
|
107 |
|
108 |
+
if not spleeter_available or spleeter_separator is None or spleeter_audio_adapter is None:
|
109 |
return None, None, None, None, None, "β Spleeter not available. Please install Spleeter."
|
110 |
|
111 |
try:
|
|
|
116 |
output_dir = f"spleeter_stems_{timestamp}"
|
117 |
os.makedirs(output_dir, exist_ok=True)
|
118 |
|
119 |
+
# Load audio using Spleeter's audio adapter (from stem_separation_spleeter.py)
|
120 |
+
print("Spleeter: Loading audio...")
|
121 |
+
waveform, sample_rate = spleeter_audio_adapter.load(audio_path, sample_rate=44100)
|
122 |
+
print(f"Spleeter: Loaded audio - shape: {waveform.shape}, sr: {sample_rate}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
# Perform the separation (from stem_separation_spleeter.py)
|
125 |
+
print("Spleeter: Separating audio sources...")
|
126 |
+
prediction = spleeter_separator.separate(waveform)
|
127 |
print("Spleeter: Separation complete.")
|
128 |
+
print(f"Spleeter: Prediction keys: {list(prediction.keys())}")
|
129 |
|
130 |
+
# Save stems with timestamp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
output_paths = []
|
132 |
+
stem_names = ["vocals", "drums", "bass", "other", "piano"]
|
133 |
+
|
134 |
+
for stem_name in stem_names:
|
135 |
+
if stem_name in prediction:
|
136 |
+
out_path = os.path.join(output_dir, f"{stem_name}_{timestamp}.wav")
|
137 |
+
stem_audio = prediction[stem_name]
|
138 |
+
|
139 |
+
print(f"Spleeter: {stem_name} audio shape: {stem_audio.shape}, dtype: {stem_audio.dtype}")
|
140 |
+
|
141 |
+
# Save using soundfile for better compatibility
|
142 |
+
sf.write(out_path, stem_audio, sample_rate)
|
143 |
+
output_paths.append(out_path)
|
144 |
+
print(f"β
Spleeter saved {stem_name} to {out_path}")
|
145 |
else:
|
146 |
+
print(f"β οΈ Warning: {stem_name} not found in prediction")
|
147 |
output_paths.append(None)
|
148 |
|
149 |
+
# Ensure we have 5 outputs
|
150 |
+
while len(output_paths) < 5:
|
151 |
+
output_paths.append(None)
|
152 |
+
|
153 |
return output_paths[0], output_paths[1], output_paths[2], output_paths[3], output_paths[4], "β
Spleeter separation successful!"
|
154 |
|
|
|
|
|
155 |
except Exception as e:
|
156 |
print(f"Spleeter Error: {e}")
|
157 |
import traceback
|
requirements.txt
CHANGED
@@ -8,5 +8,5 @@ numpy>=1.21.0,<1.25.0
|
|
8 |
soundfile
|
9 |
librosa==0.8.1
|
10 |
httpx>=0.19.0,<0.20.0
|
11 |
-
numba==0.
|
12 |
-
llvmlite==0.
|
|
|
8 |
soundfile
|
9 |
librosa==0.8.1
|
10 |
httpx>=0.19.0,<0.20.0
|
11 |
+
numba==0.55.2
|
12 |
+
llvmlite==0.38.1
|
stem_separation_spleeter.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Stem_Separation_Spleeter.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1ZanGdGmndmOSa0q2arjO1hT5p72n56rm
|
8 |
+
|
9 |
+
# Audio Stem Separation with Spleeter
|
10 |
+
Audio stem separation is the task of isolating individual components (stems) from a mixed audio signal, such as vocals, bass, drums, and other instrumental parts. This technology has various applications, including music production, remixing, karaoke, and forensic audio analysis. Spleeter, developed by Deezer Research, is a powerful open-source tool that leverages deep learning to perform this separation efficiently and effectively.
|
11 |
+
|
12 |
+
This Jupyter notebook demonstrates how to use Spleeter, an open-source library developed by Deezer Research for audio stem separation. The notebook provides a step-by-step guide to:
|
13 |
+
|
14 |
+
1. Install the Spleeter library
|
15 |
+
2. Separate an audio file into different stems (vocal, bass, drums, and other)
|
16 |
+
3. Visualize the separated audio sources using waveforms and spectrograms
|
17 |
+
|
18 |
+
Additionally it is demonstrated how to use Spleeter from the command-line.
|
19 |
+
The code is designed to be adaptable for both Google Colab and local Python environments, with clear instructions for running the script in different settings.
|
20 |
+
|
21 |
+
View Spleeter on GitHub: https://github.com/deezer/spleeter
|
22 |
+
|
23 |
+
## Instructions to run the code
|
24 |
+
Let's begin by installing spleeter.
|
25 |
+
|
26 |
+
I had problems running this in my local python 3.11 environment because my numpy and librosa versions were incompatible. If this problem happens to you as well, I would recommend using COLAB instead or using an older python version (for example 3.8).
|
27 |
+
"""
|
28 |
+
|
29 |
+
!pip install spleeter
|
30 |
+
!pip install librosa
|
31 |
+
!pip install matplotlib>=3.5
|
32 |
+
|
33 |
+
"""Next, we import all the required packages."""
|
34 |
+
|
35 |
+
from spleeter.separator import Separator
|
36 |
+
from spleeter.audio.adapter import AudioAdapter
|
37 |
+
from IPython.display import Audio
|
38 |
+
import librosa.display
|
39 |
+
import matplotlib.pyplot as plt
|
40 |
+
import numpy as np
|
41 |
+
|
42 |
+
"""Before running the next cell, please insert a .mp3 / .wav file into this directory and paste the name of it in the variable *INPUT_FILE* in the code.
|
43 |
+
If you are using COLAB, you can find this option by selecting files at the verical bar on the left.
|
44 |
+
|
45 |
+
With the current settings, the model separates vocals, bass, drums and accomplishment from the input-file.
|
46 |
+
However, you can change this by setting *SPLEETER_MODEL* to one of the following:
|
47 |
+
- spleeter:2stems
|
48 |
+
- separates only Vocals and accomplishment
|
49 |
+
- spleeter:4stems
|
50 |
+
- separates vocals, bass, drums and accomplishment
|
51 |
+
- spleeter:5stems
|
52 |
+
- separates vocals, bass, drums, piano and accomplishment
|
53 |
+
|
54 |
+
Lets start by separating our first stems.
|
55 |
+
Running this cell might take some time, depending on your setup, the audio file and the selected model
|
56 |
+
"""
|
57 |
+
|
58 |
+
# insert your file name here
|
59 |
+
INPUT_FILE = 'example_for_demo.mp3'
|
60 |
+
INPUT_FILENAME = INPUT_FILE.split('.')[0]
|
61 |
+
OUTPUT_DIR = 'outputs'
|
62 |
+
SUPPORTED_EXTENSIONS = ('.wav', '.mp3')
|
63 |
+
SAMPLE_RATE = 44100
|
64 |
+
SPLEETER_MODEL = 'spleeter:5stems' # You might want to change this to 'spleeter:2stems' or 'spleeter:4stems' for different models
|
65 |
+
|
66 |
+
# Initialize separator
|
67 |
+
separator = Separator(SPLEETER_MODEL)
|
68 |
+
|
69 |
+
# Load audio
|
70 |
+
audio_loader = AudioAdapter.default()
|
71 |
+
waveform, _ = audio_loader.load(INPUT_FILE, sample_rate=SAMPLE_RATE)
|
72 |
+
|
73 |
+
# Perform the separation and save the output
|
74 |
+
print("Separating audio sources...")
|
75 |
+
# prediction = separator.separate(waveform) # separating the audio, without saving it
|
76 |
+
separator.separate_to_file(INPUT_FILE, OUTPUT_DIR)
|
77 |
+
|
78 |
+
"""## Displaying Audio, Spectrogram and Waveform
|
79 |
+
|
80 |
+
Lets create two helper-functions that plot the waveform and the spectrogram of an audio file.
|
81 |
+
By using different librosa methods, we can easily create such plots.
|
82 |
+
|
83 |
+
Finally we create a method that calls these two methods and additionally displays the audio, so that it can be played directly in this notebook.
|
84 |
+
"""
|
85 |
+
|
86 |
+
# Visualization of Waveforms and Spectrograms
|
87 |
+
|
88 |
+
# Plot waveform
|
89 |
+
def plot_waveform(waveform, sr, title='Waveform'):
|
90 |
+
plt.figure(figsize=(15, 2))
|
91 |
+
librosa.display.waveshow(waveform, sr=sr)
|
92 |
+
plt.title(title)
|
93 |
+
plt.tight_layout()
|
94 |
+
plt.show()
|
95 |
+
|
96 |
+
# Plot spectrogram
|
97 |
+
def plot_spectrogram(signal, sr, title='Spectrogram'):
|
98 |
+
D = librosa.amplitude_to_db(np.abs(librosa.stft(signal)), ref=np.max)
|
99 |
+
plt.figure(figsize=(15, 2))
|
100 |
+
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
|
101 |
+
plt.title(title)
|
102 |
+
plt.colorbar(format='%+2.0f dB')
|
103 |
+
plt.tight_layout()
|
104 |
+
plt.show()
|
105 |
+
|
106 |
+
def display_audio_and_plots(fileName):
|
107 |
+
display(Audio(fileName))
|
108 |
+
print(fileName)
|
109 |
+
y, sr = librosa.load(fileName)
|
110 |
+
plot_waveform(y, sr, f'Waveform of {fileName}')
|
111 |
+
plot_spectrogram(y, sr, f'Spectrogram of {fileName}')
|
112 |
+
print('---' * 50)
|
113 |
+
|
114 |
+
"""Now we can use this method and display all the relevant formats for the original song and each separated stem.
|
115 |
+
Here you should be able to notice some characteristics (depending on the song that you picked):
|
116 |
+
- The bass should be located at the lower frequencies of the spectrogram.
|
117 |
+
- The spectogram of drums often consists of vertical lines, which means, that they occur in multiple frequencies at the same time and that their hits are of very short time (which is typcal for drums).
|
118 |
+
- Vocals often display as horizontal lines in the Spectrogram (depending on the Genre). This is because their tones are often held longer, compared to other instruments.
|
119 |
+
|
120 |
+
In the following cell, you can insert the path of any wav/mp3 file and display the different plots.
|
121 |
+
You might have to comment some calls of display_audio_and_plots() depending on the model that you've chosen.
|
122 |
+
"""
|
123 |
+
|
124 |
+
display_audio_and_plots(INPUT_FILE)
|
125 |
+
|
126 |
+
# if you are running this in a local environment, you can use the following code to display the audio
|
127 |
+
display_audio_and_plots(f'{OUTPUT_DIR}/{INPUT_FILENAME}/other.wav')
|
128 |
+
display_audio_and_plots(f'{OUTPUT_DIR}/{INPUT_FILENAME}/vocals.wav')
|
129 |
+
display_audio_and_plots(f'{OUTPUT_DIR}/{INPUT_FILENAME}/bass.wav')
|
130 |
+
display_audio_and_plots(f'{OUTPUT_DIR}/{INPUT_FILENAME}/drums.wav')
|
131 |
+
display_audio_and_plots(f'{OUTPUT_DIR}/{INPUT_FILENAME}/piano.wav')
|
132 |
+
|
133 |
+
|
134 |
+
# if you are running this in COLAB, you can use the following code to display the audio
|
135 |
+
# display_audio_and_plots(f'/content/{OUTPUT_DIR}/{INPUT_FILENAME}/other.wav')
|
136 |
+
# display_audio_and_plots(f'/content/{OUTPUT_DIR}/{INPUT_FILENAME}/vocals.wav')
|
137 |
+
# display_audio_and_plots(f'/content/{OUTPUT_DIR}/{INPUT_FILENAME}/bass.wav')
|
138 |
+
# display_audio_and_plots(f'/content/{OUTPUT_DIR}/{INPUT_FILENAME}/drums.wav')
|
139 |
+
# display_audio_and_plots(f'/content/{OUTPUT_DIR}/{INPUT_FILENAME}/piano.wav')
|
140 |
+
|
141 |
+
"""## Running Spleeter on Command Line
|
142 |
+
|
143 |
+
The following Cell demonstrates how to use Spleeter on the command-line. You can execute the cell here in this Notebook, or you copy the command into your command line. Note that if you run this directly on the command-line, you have to remove the "!" before the command.
|
144 |
+
|
145 |
+
Again, you can choose between different Models here. Either 2, 4 or 5 stems can be separated.
|
146 |
+
|
147 |
+
To run Spleeter on comman-line, you have to provide a name of the output directory (here: audio_output), a model and an audio file (here: example_for_demo.mp3).
|
148 |
+
"""
|
149 |
+
|
150 |
+
# 2 stems
|
151 |
+
!spleeter separate -o audio_output example_for_demo.mp3
|
152 |
+
# 4 stems
|
153 |
+
# !spleeter separate -o audio_output -p spleeter:4stems example_for_demo.mp3
|
154 |
+
|
155 |
+
# 5 stems
|
156 |
+
# !spleeter separate -o audio_output -p spleeter:5stems example_for_demo.mp3
|