Spaces:
Runtime error
Runtime error
Add Interrupt Button
Browse files- app.py +20 -3
- audiocraft/utils/extend.py +5 -3
app.py
CHANGED
|
@@ -11,6 +11,8 @@ import argparse
|
|
| 11 |
import torch
|
| 12 |
import gradio as gr
|
| 13 |
import os
|
|
|
|
|
|
|
| 14 |
from audiocraft.models import MusicGen
|
| 15 |
from audiocraft.data.audio import audio_write
|
| 16 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
|
|
@@ -20,6 +22,19 @@ import random
|
|
| 20 |
MODEL = None
|
| 21 |
IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
def load_model(version):
|
| 25 |
print("Loading model", version)
|
|
@@ -102,7 +117,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 102 |
output = output_segments[0]
|
| 103 |
for i in range(1, len(output_segments)):
|
| 104 |
overlap_samples = overlap * MODEL.sample_rate
|
| 105 |
-
output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=
|
| 106 |
output = output.detach().cpu().float()[0]
|
| 107 |
except Exception as e:
|
| 108 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
|
@@ -116,7 +131,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 116 |
audio_write(
|
| 117 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 118 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
| 119 |
-
waveform_video =
|
| 120 |
return waveform_video, seed
|
| 121 |
|
| 122 |
|
|
@@ -144,6 +159,8 @@ def ui(**kwargs):
|
|
| 144 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
| 145 |
with gr.Row():
|
| 146 |
submit = gr.Button("Submit")
|
|
|
|
|
|
|
| 147 |
with gr.Row():
|
| 148 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 149 |
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
|
@@ -156,7 +173,7 @@ def ui(**kwargs):
|
|
| 156 |
with gr.Row():
|
| 157 |
duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
|
| 158 |
overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
|
| 159 |
-
dimension = gr.Slider(minimum=-2, maximum=
|
| 160 |
with gr.Row():
|
| 161 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
| 162 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
|
|
|
| 11 |
import torch
|
| 12 |
import gradio as gr
|
| 13 |
import os
|
| 14 |
+
import time
|
| 15 |
+
import warnings
|
| 16 |
from audiocraft.models import MusicGen
|
| 17 |
from audiocraft.data.audio import audio_write
|
| 18 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
|
|
|
|
| 22 |
MODEL = None
|
| 23 |
IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
|
| 24 |
|
| 25 |
+
def interrupt():
|
| 26 |
+
global INTERRUPTING
|
| 27 |
+
INTERRUPTING = True
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def make_waveform(*args, **kwargs):
|
| 31 |
+
# Further remove some warnings.
|
| 32 |
+
be = time.time()
|
| 33 |
+
with warnings.catch_warnings():
|
| 34 |
+
warnings.simplefilter('ignore')
|
| 35 |
+
out = gr.make_waveform(*args, **kwargs)
|
| 36 |
+
print("Make a video took", time.time() - be)
|
| 37 |
+
return out
|
| 38 |
|
| 39 |
def load_model(version):
|
| 40 |
print("Loading model", version)
|
|
|
|
| 117 |
output = output_segments[0]
|
| 118 |
for i in range(1, len(output_segments)):
|
| 119 |
overlap_samples = overlap * MODEL.sample_rate
|
| 120 |
+
output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=dimension)
|
| 121 |
output = output.detach().cpu().float()[0]
|
| 122 |
except Exception as e:
|
| 123 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
|
|
|
| 131 |
audio_write(
|
| 132 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 133 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
| 134 |
+
waveform_video = make_waveform(file.name,bg_image=background, bar_count=40)
|
| 135 |
return waveform_video, seed
|
| 136 |
|
| 137 |
|
|
|
|
| 159 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
| 160 |
with gr.Row():
|
| 161 |
submit = gr.Button("Submit")
|
| 162 |
+
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
| 163 |
+
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
| 164 |
with gr.Row():
|
| 165 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 166 |
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
|
|
|
| 173 |
with gr.Row():
|
| 174 |
duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
|
| 175 |
overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
|
| 176 |
+
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 177 |
with gr.Row():
|
| 178 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
| 179 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
audiocraft/utils/extend.py
CHANGED
|
@@ -30,7 +30,7 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
|
| 30 |
if total_samples > 0:
|
| 31 |
segment = audio_data[-segment_samples:]
|
| 32 |
segments.append((sr, segment))
|
| 33 |
-
|
| 34 |
return segments
|
| 35 |
|
| 36 |
def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
|
|
@@ -43,9 +43,11 @@ def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:
|
|
| 43 |
|
| 44 |
# Calculate the total number of segments
|
| 45 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
#calc excess duration
|
| 48 |
-
excess_duration = total_segments * segment_duration - duration
|
| 49 |
print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
|
| 50 |
|
| 51 |
# If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
|
|
|
|
| 30 |
if total_samples > 0:
|
| 31 |
segment = audio_data[-segment_samples:]
|
| 32 |
segments.append((sr, segment))
|
| 33 |
+
print(f"separate_audio_segments: {len(segments)} segments")
|
| 34 |
return segments
|
| 35 |
|
| 36 |
def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
|
|
|
|
| 43 |
|
| 44 |
# Calculate the total number of segments
|
| 45 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
| 46 |
+
# account for overlap
|
| 47 |
+
duration = duration + (max((total_segments - 1),0) * overlap)
|
| 48 |
+
total_segments = max(math.ceil(duration / segment_duration),1)
|
| 49 |
#calc excess duration
|
| 50 |
+
excess_duration = segment_duration - (total_segments * segment_duration - duration)
|
| 51 |
print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
|
| 52 |
|
| 53 |
# If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
|