Spaces:

thepatch
/

the-slot-machine

Sleeping

App Files Files Community

thecollabagepatch commited on Mar 27

Commit

3e4e311

•

1 Parent(s): abfd774

herewego

Browse files

Files changed (4) hide show

.gitmodules +3 -0
app.py +186 -0
packages.txt +2 -0
requirements.txt +5 -0

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "audiocraft"]
+    path = audiocraft
+    url = https://github.com/facebookresearch/audiocraft

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import gradio as gr
+from musiclang_predict import MusicLangPredictor
+import random
+import subprocess
+import os
+import torchaudio
+import torch
+import numpy as np
+from audiocraft.models import MusicGen
+from audiocraft.data.audio import audio_write
+from pydub import AudioSegment
+# Utility Functions
+def peak_normalize(y, target_peak=0.97):
+    return target_peak * (y / np.max(np.abs(y)))
+def rms_normalize(y, target_rms=0.05):
+    return y * (target_rms / np.sqrt(np.mean(y**2)))
+def preprocess_audio(waveform):
+    waveform_np = waveform.cpu().squeeze().numpy()  # Move to CPU before converting to NumPy
+    processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
+    return torch.from_numpy(processed_waveform_np).unsqueeze(0).to(device)
+def create_slices(song, sr, slice_duration, bpm, num_slices=5):
+    song_length = song.shape[-1] / sr
+    slices = []
+    # Ensure the first slice is from the beginning of the song
+    first_slice_waveform = song[..., :int(slice_duration * sr)]
+    slices.append(first_slice_waveform)
+    for i in range(1, num_slices):
+        random_start = random.choice(range(int(slice_duration * sr), int(song_length * sr), int(4 * 60 / bpm * sr)))
+        slice_end = random_start + int(slice_duration * sr)
+        if slice_end > song_length * sr:
+            # Wrap around to the beginning of the song
+            remaining_samples = int(slice_end - song_length * sr)
+            slice_waveform = torch.cat([song[..., random_start:], song[..., :remaining_samples]], dim=-1)
+        else:
+            slice_waveform = song[..., random_start:slice_end]
+        if len(slice_waveform.squeeze()) < int(slice_duration * sr):
+            additional_samples_needed = int(slice_duration * sr) - len(slice_waveform.squeeze())
+            slice_waveform = torch.cat([slice_waveform, song[..., :additional_samples_needed]], dim=-1)
+        slices.append(slice_waveform)
+    return slices
+def calculate_duration(bpm, min_duration=29, max_duration=30):
+    single_bar_duration = 4 * 60 / bpm
+    bars = max(min_duration // single_bar_duration, 1)
+    while single_bar_duration * bars < min_duration:
+        bars += 1
+    duration = single_bar_duration * bars
+    while duration > max_duration and bars > 1:
+        bars -= 1
+        duration = single_bar_duration * bars
+    return duration
+def generate_music(seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm):
+    if seed == "":
+        seed = random.randint(1, 10000)
+    ml = MusicLangPredictor('musiclang/musiclang-v2')
+    try:
+        seed = int(seed)
+    except ValueError:
+        seed = random.randint(1, 10000)
+    nb_tokens = 4096
+    temperature = 0.9
+    top_p = 1.0
+    if use_chords and chord_progression.strip():
+        score = ml.predict_chords(
+            chord_progression,
+            time_signature=(4, 4),
+            temperature=temperature,
+            topp=top_p,
+            rng_seed=seed
+        )
+    else:
+        score = ml.predict(
+            nb_tokens=nb_tokens,
+            temperature=temperature,
+            topp=top_p,
+            rng_seed=seed
+        )
+    midi_filename = f"output_{seed}.mid"
+    wav_filename = midi_filename.replace(".mid", ".wav")
+    score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
+    subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
+    # Load the generated audio
+    song, sr = torchaudio.load(wav_filename)
+    song = song.to(device)
+    # Use the user-provided BPM value for duration calculation
+    duration = calculate_duration(bpm)
+    # Create slices from the song using the user-provided BPM value
+    slices = create_slices(song, sr, 35, bpm, num_slices=5)
+    # Load the model
+    model_continue = MusicGen.get_pretrained(musicgen_model)
+    # Setting generation parameters
+    model_continue.set_generation_params(
+        use_sampling=True,
+        top_k=250,
+        top_p=0.0,
+        temperature=1.0,
+        duration=duration,
+        cfg_coef=3
+    )
+    all_audio_files = []
+    for i in range(num_iterations):
+        slice_idx = i % len(slices)
+        print(f"Running iteration {i + 1} using slice {slice_idx}...")
+        prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
+        prompt_waveform = preprocess_audio(prompt_waveform)
+        output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
+        output = output.cpu()  # Move the output tensor back to CPU
+        # Make sure the output tensor has at most 2 dimensions
+        if len(output.size()) > 2:
+            output = output.squeeze()
+        filename_without_extension = f'continue_{i}'
+        filename_with_extension = f'{filename_without_extension}.wav'
+        audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
+        all_audio_files.append(f'{filename_without_extension}.wav.wav')  # Assuming the library appends an extra .wav
+    # Combine all audio files
+    combined_audio = AudioSegment.empty()
+    for filename in all_audio_files:
+        combined_audio += AudioSegment.from_wav(filename)
+    combined_audio_filename = f"combined_audio_{seed}.mp3"
+    combined_audio.export(combined_audio_filename, format="mp3")
+    # Clean up temporary files
+    os.remove(midi_filename)
+    os.remove(wav_filename)
+    for filename in all_audio_files:
+        os.remove(filename)
+    return combined_audio_filename
+# Check if CUDA is available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+iface = gr.Interface(
+    fn=generate_music,
+    inputs=[
+        gr.Textbox(label="Seed (leave blank for random)", value=""),
+        gr.Checkbox(label="Control Chord Progression", value=False),
+        gr.Textbox(label="Chord Progression (e.g., Am CM Dm E7 Am)", visible=True),
+        gr.Dropdown(label="Prompt Duration (seconds)", choices=list(range(1, 11)), value=7),
+        gr.Textbox(label="MusicGen Model", value="thepatch/vanya_ai_dnb_0.1"),
+        gr.Slider(label="Number of Iterations", minimum=1, maximum=10, step=1, value=3),
+        gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=140)
+    ],
+    outputs=gr.Audio(label="Generated Music"),
+    title="Music Generation Slot Machine",
+    description="Enter a seed to generate music or leave blank for a random tune! Optionally, control the chord progression, prompt duration, MusicGen model, number of iterations, and BPM."
+)
+iface.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ffmpeg
2	+ fluidsynth &> /dev/null

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch==2.1.0
+musiclang_predict
+gradio
+pyFluidSynth &> /dev/null
+midi2audio &> /dev/null