Spaces:
Sleeping
Sleeping
thecollabagepatch
commited on
Commit
•
3e4e311
1
Parent(s):
abfd774
herewego
Browse files- .gitmodules +3 -0
- app.py +186 -0
- packages.txt +2 -0
- requirements.txt +5 -0
.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "audiocraft"]
|
2 |
+
path = audiocraft
|
3 |
+
url = https://github.com/facebookresearch/audiocraft
|
app.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from musiclang_predict import MusicLangPredictor
|
3 |
+
import random
|
4 |
+
import subprocess
|
5 |
+
import os
|
6 |
+
import torchaudio
|
7 |
+
import torch
|
8 |
+
import numpy as np
|
9 |
+
from audiocraft.models import MusicGen
|
10 |
+
from audiocraft.data.audio import audio_write
|
11 |
+
from pydub import AudioSegment
|
12 |
+
|
13 |
+
# Utility Functions
|
14 |
+
def peak_normalize(y, target_peak=0.97):
|
15 |
+
return target_peak * (y / np.max(np.abs(y)))
|
16 |
+
|
17 |
+
def rms_normalize(y, target_rms=0.05):
|
18 |
+
return y * (target_rms / np.sqrt(np.mean(y**2)))
|
19 |
+
|
20 |
+
def preprocess_audio(waveform):
|
21 |
+
waveform_np = waveform.cpu().squeeze().numpy() # Move to CPU before converting to NumPy
|
22 |
+
processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
|
23 |
+
return torch.from_numpy(processed_waveform_np).unsqueeze(0).to(device)
|
24 |
+
|
25 |
+
def create_slices(song, sr, slice_duration, bpm, num_slices=5):
|
26 |
+
song_length = song.shape[-1] / sr
|
27 |
+
slices = []
|
28 |
+
|
29 |
+
# Ensure the first slice is from the beginning of the song
|
30 |
+
first_slice_waveform = song[..., :int(slice_duration * sr)]
|
31 |
+
slices.append(first_slice_waveform)
|
32 |
+
|
33 |
+
for i in range(1, num_slices):
|
34 |
+
random_start = random.choice(range(int(slice_duration * sr), int(song_length * sr), int(4 * 60 / bpm * sr)))
|
35 |
+
slice_end = random_start + int(slice_duration * sr)
|
36 |
+
|
37 |
+
if slice_end > song_length * sr:
|
38 |
+
# Wrap around to the beginning of the song
|
39 |
+
remaining_samples = int(slice_end - song_length * sr)
|
40 |
+
slice_waveform = torch.cat([song[..., random_start:], song[..., :remaining_samples]], dim=-1)
|
41 |
+
else:
|
42 |
+
slice_waveform = song[..., random_start:slice_end]
|
43 |
+
|
44 |
+
if len(slice_waveform.squeeze()) < int(slice_duration * sr):
|
45 |
+
additional_samples_needed = int(slice_duration * sr) - len(slice_waveform.squeeze())
|
46 |
+
slice_waveform = torch.cat([slice_waveform, song[..., :additional_samples_needed]], dim=-1)
|
47 |
+
|
48 |
+
slices.append(slice_waveform)
|
49 |
+
|
50 |
+
return slices
|
51 |
+
|
52 |
+
def calculate_duration(bpm, min_duration=29, max_duration=30):
|
53 |
+
single_bar_duration = 4 * 60 / bpm
|
54 |
+
bars = max(min_duration // single_bar_duration, 1)
|
55 |
+
|
56 |
+
while single_bar_duration * bars < min_duration:
|
57 |
+
bars += 1
|
58 |
+
|
59 |
+
duration = single_bar_duration * bars
|
60 |
+
|
61 |
+
while duration > max_duration and bars > 1:
|
62 |
+
bars -= 1
|
63 |
+
duration = single_bar_duration * bars
|
64 |
+
|
65 |
+
return duration
|
66 |
+
|
67 |
+
def generate_music(seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm):
|
68 |
+
if seed == "":
|
69 |
+
seed = random.randint(1, 10000)
|
70 |
+
|
71 |
+
ml = MusicLangPredictor('musiclang/musiclang-v2')
|
72 |
+
|
73 |
+
try:
|
74 |
+
seed = int(seed)
|
75 |
+
except ValueError:
|
76 |
+
seed = random.randint(1, 10000)
|
77 |
+
|
78 |
+
nb_tokens = 4096
|
79 |
+
temperature = 0.9
|
80 |
+
top_p = 1.0
|
81 |
+
|
82 |
+
if use_chords and chord_progression.strip():
|
83 |
+
score = ml.predict_chords(
|
84 |
+
chord_progression,
|
85 |
+
time_signature=(4, 4),
|
86 |
+
temperature=temperature,
|
87 |
+
topp=top_p,
|
88 |
+
rng_seed=seed
|
89 |
+
)
|
90 |
+
else:
|
91 |
+
score = ml.predict(
|
92 |
+
nb_tokens=nb_tokens,
|
93 |
+
temperature=temperature,
|
94 |
+
topp=top_p,
|
95 |
+
rng_seed=seed
|
96 |
+
)
|
97 |
+
|
98 |
+
midi_filename = f"output_{seed}.mid"
|
99 |
+
wav_filename = midi_filename.replace(".mid", ".wav")
|
100 |
+
|
101 |
+
score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
|
102 |
+
|
103 |
+
subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
|
104 |
+
|
105 |
+
# Load the generated audio
|
106 |
+
song, sr = torchaudio.load(wav_filename)
|
107 |
+
song = song.to(device)
|
108 |
+
|
109 |
+
# Use the user-provided BPM value for duration calculation
|
110 |
+
duration = calculate_duration(bpm)
|
111 |
+
|
112 |
+
# Create slices from the song using the user-provided BPM value
|
113 |
+
slices = create_slices(song, sr, 35, bpm, num_slices=5)
|
114 |
+
|
115 |
+
# Load the model
|
116 |
+
model_continue = MusicGen.get_pretrained(musicgen_model)
|
117 |
+
|
118 |
+
# Setting generation parameters
|
119 |
+
model_continue.set_generation_params(
|
120 |
+
use_sampling=True,
|
121 |
+
top_k=250,
|
122 |
+
top_p=0.0,
|
123 |
+
temperature=1.0,
|
124 |
+
duration=duration,
|
125 |
+
cfg_coef=3
|
126 |
+
)
|
127 |
+
|
128 |
+
all_audio_files = []
|
129 |
+
|
130 |
+
for i in range(num_iterations):
|
131 |
+
slice_idx = i % len(slices)
|
132 |
+
|
133 |
+
print(f"Running iteration {i + 1} using slice {slice_idx}...")
|
134 |
+
|
135 |
+
prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
|
136 |
+
prompt_waveform = preprocess_audio(prompt_waveform)
|
137 |
+
|
138 |
+
output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
|
139 |
+
output = output.cpu() # Move the output tensor back to CPU
|
140 |
+
|
141 |
+
# Make sure the output tensor has at most 2 dimensions
|
142 |
+
if len(output.size()) > 2:
|
143 |
+
output = output.squeeze()
|
144 |
+
|
145 |
+
filename_without_extension = f'continue_{i}'
|
146 |
+
filename_with_extension = f'{filename_without_extension}.wav'
|
147 |
+
|
148 |
+
audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
|
149 |
+
all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
|
150 |
+
|
151 |
+
# Combine all audio files
|
152 |
+
combined_audio = AudioSegment.empty()
|
153 |
+
for filename in all_audio_files:
|
154 |
+
combined_audio += AudioSegment.from_wav(filename)
|
155 |
+
|
156 |
+
combined_audio_filename = f"combined_audio_{seed}.mp3"
|
157 |
+
combined_audio.export(combined_audio_filename, format="mp3")
|
158 |
+
|
159 |
+
# Clean up temporary files
|
160 |
+
os.remove(midi_filename)
|
161 |
+
os.remove(wav_filename)
|
162 |
+
for filename in all_audio_files:
|
163 |
+
os.remove(filename)
|
164 |
+
|
165 |
+
return combined_audio_filename
|
166 |
+
|
167 |
+
# Check if CUDA is available
|
168 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
169 |
+
|
170 |
+
iface = gr.Interface(
|
171 |
+
fn=generate_music,
|
172 |
+
inputs=[
|
173 |
+
gr.Textbox(label="Seed (leave blank for random)", value=""),
|
174 |
+
gr.Checkbox(label="Control Chord Progression", value=False),
|
175 |
+
gr.Textbox(label="Chord Progression (e.g., Am CM Dm E7 Am)", visible=True),
|
176 |
+
gr.Dropdown(label="Prompt Duration (seconds)", choices=list(range(1, 11)), value=7),
|
177 |
+
gr.Textbox(label="MusicGen Model", value="thepatch/vanya_ai_dnb_0.1"),
|
178 |
+
gr.Slider(label="Number of Iterations", minimum=1, maximum=10, step=1, value=3),
|
179 |
+
gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=140)
|
180 |
+
],
|
181 |
+
outputs=gr.Audio(label="Generated Music"),
|
182 |
+
title="Music Generation Slot Machine",
|
183 |
+
description="Enter a seed to generate music or leave blank for a random tune! Optionally, control the chord progression, prompt duration, MusicGen model, number of iterations, and BPM."
|
184 |
+
)
|
185 |
+
|
186 |
+
iface.launch()
|
packages.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
ffmpeg
|
2 |
+
fluidsynth &> /dev/null
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.1.0
|
2 |
+
musiclang_predict
|
3 |
+
gradio
|
4 |
+
pyFluidSynth &> /dev/null
|
5 |
+
midi2audio &> /dev/null
|