Spaces:

Surn
/

UnlimitedMusicGen

Running on T4

App Files Files Community

Surn commited on Apr 10

Commit

8fcd249

1 Parent(s): 28a61b8

Progress Bars Update

Browse files

Files changed (4) hide show

app.py +19 -3
audiocraft/models/musicgen.py +2 -2
audiocraft/utils/extend.py +7 -2
modules/gradio.py +2 -0

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from pathlib import Path
 import time
 import typing as tp
 import warnings
 from tqdm import tqdm
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
@@ -139,7 +140,7 @@ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperatur
     symbols = ['_', '.', '-']
     MAX_OVERLAP = int(segment_length  // 2) - 1
     if (melody_filepath is None) or (melody_filepath == ""):
-        return title, gr.update(maximum=0, value=0) , gr.update(value="medium", interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)
     if (title is None) or ("MusicGen" in title) or (title == ""):
         melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
@@ -166,7 +167,7 @@ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperatur
     print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
     MAX_PROMPT_INDEX = total_melodys
-    return  gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)
 def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, settings_font_size=28, progress=gr.Progress(track_tqdm=True)):
     global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
@@ -331,7 +332,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
         audio_write(
             file.name, output, MODEL.sample_rate, strategy="loudness",
             loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
-        waveform_video_path = get_waveform(file.name, bg_image=background, bar_count=45, name=title_file_name, animate=False)
         # Remove the extension from file.name
         file_name_without_extension = os.path.splitext(file.name)[0]
         # Get the directory, filename, name, extension, and new extension of the waveform video path
@@ -345,6 +346,8 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
         commit = commit_hash()
         metadata = {
             "prompt": text,
             "negative_prompt": "",
             "Seed": seed,
@@ -407,6 +410,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
             video=waveform_video_path,
             label=title,
             metadata=metadata,
         )
@@ -414,6 +418,16 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
         MODEL.to('cpu')
     if UNLOAD_MODEL:
         MODEL = None
     torch.cuda.empty_cache()
     torch.cuda.ipc_collect()
     return waveform_video_path, file.name, seed
@@ -552,7 +566,9 @@ def ui(**kwargs):
             )
         with gr.Tab("User History") as history_tab:
             modules.user_history.render()
         user_profile = gr.State(None)
         with gr.Row("Versions") as versions_row:

 import time
 import typing as tp
 import warnings
+import gc
 from tqdm import tqdm
 from audiocraft.models import MusicGen
 from audiocraft.data.audio import audio_write
     symbols = ['_', '.', '-']
     MAX_OVERLAP = int(segment_length  // 2) - 1
     if (melody_filepath is None) or (melody_filepath == ""):
+        return title, gr.update(maximum=0, value=-1) , gr.update(value="medium", interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)
     if (title is None) or ("MusicGen" in title) or (title == ""):
         melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
     print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
     MAX_PROMPT_INDEX = total_melodys
+    return  gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=-1), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP)
 def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, settings_font_size=28, progress=gr.Progress(track_tqdm=True)):
     global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
         audio_write(
             file.name, output, MODEL.sample_rate, strategy="loudness",
             loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
+        waveform_video_path = get_waveform(file.name, bg_image=background, bar_count=45, name=title_file_name, animate=False, progress=gr.Progress(track_tqdm=True))
         # Remove the extension from file.name
         file_name_without_extension = os.path.splitext(file.name)[0]
         # Get the directory, filename, name, extension, and new extension of the waveform video path
         commit = commit_hash()
         metadata = {
+            "Title": title,
+            "Year": time.strftime("%Y"),
             "prompt": text,
             "negative_prompt": "",
             "Seed": seed,
             video=waveform_video_path,
             label=title,
             metadata=metadata,
+            progress=gr.Progress(track_tqdm=True)
         )
         MODEL.to('cpu')
     if UNLOAD_MODEL:
         MODEL = None
+    # Explicitly delete large tensors or objects
+    del output_segments, output, melody, melody_name, melody_extension, metadata, mp4
+    # Force garbage collection
+    gc.collect()
+    # Synchronize CUDA streams
+    torch.cuda.synchronize()
     torch.cuda.empty_cache()
     torch.cuda.ipc_collect()
     return waveform_video_path, file.name, seed
             )
         with gr.Tab("User History") as history_tab:
+            modules.user_history.setup(display_type="video_path")
             modules.user_history.render()
         user_profile = gr.State(None)
         with gr.Row("Versions") as versions_row:

audiocraft/models/musicgen.py CHANGED Viewed

@@ -411,8 +411,8 @@ class MusicGen:
         def _progress_callback(generated_tokens: int, tokens_to_generate: int):
             generated_tokens += current_gen_offset
-            generated_tokens /= 50
-            tokens_to_generate /= 50
             if self._progress_callback is not None:
                 # Note that total_gen_len might be quite wrong depending on the
                 # codebook pattern used, but with delay it is almost accurate.

         def _progress_callback(generated_tokens: int, tokens_to_generate: int):
             generated_tokens += current_gen_offset
+            generated_tokens /= ((tokens_to_generate - 3) / self.duration)
+            tokens_to_generate /= ((tokens_to_generate - 3) / self.duration)
             if self._progress_callback is not None:
                 # Note that total_gen_len might be quite wrong depending on the
                 # codebook pattern used, but with delay it is almost accurate.

audiocraft/utils/extend.py CHANGED Viewed

@@ -14,6 +14,7 @@ from huggingface_hub import hf_hub_download
 import librosa
 import gradio as gr
 import re
 INTERRUPTING = False
@@ -72,6 +73,7 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
     excess_duration = segment_duration - (total_segments * segment_duration - duration)
     print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration} Overlap Loss {duration_loss}")
     duration += duration_loss
     while excess_duration + duration_loss > segment_duration:
         total_segments += 1
         #calculate duration loss from segment overlap
@@ -82,6 +84,7 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
         if excess_duration + duration_loss > segment_duration:
             duration += duration_loss
             duration_loss = 0
     total_segments = min(total_segments, (720 // segment_duration))
     # If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
@@ -90,6 +93,7 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
         for i in range(total_segments - len(melody_segments)):
             segment = melody_segments[i]
             melody_segments.append(segment)
         print(f"melody_segments: {len(melody_segments)} fixed")
     # Iterate over the segments to create list of Meldoy tensors
@@ -116,7 +120,8 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
         # Append the segment to the melodys list
         melodys.append(verse)
     torch.manual_seed(seed)
     # If user selects a prompt segment, generate a new prompt segment to use on all segments
@@ -147,7 +152,7 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:
         prompt=None,
     )
-    for idx, verse in enumerate(melodys):
         if INTERRUPTING:
             return output_segments, duration

 import librosa
 import gradio as gr
 import re
+from tqdm import tqdm
 INTERRUPTING = False
     excess_duration = segment_duration - (total_segments * segment_duration - duration)
     print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration} Overlap Loss {duration_loss}")
     duration += duration_loss
+    pbar = tqdm(total=total_segments*2, desc="Generating segments", leave=False)
     while excess_duration + duration_loss > segment_duration:
         total_segments += 1
         #calculate duration loss from segment overlap
         if excess_duration + duration_loss > segment_duration:
             duration += duration_loss
             duration_loss = 0
+    pbar.update(1)
     total_segments = min(total_segments, (720 // segment_duration))
     # If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
         for i in range(total_segments - len(melody_segments)):
             segment = melody_segments[i]
             melody_segments.append(segment)
+            pbar.update(1)
         print(f"melody_segments: {len(melody_segments)} fixed")
     # Iterate over the segments to create list of Meldoy tensors
         # Append the segment to the melodys list
         melodys.append(verse)
+        pbar.update(1)
+    pbar.close()
     torch.manual_seed(seed)
     # If user selects a prompt segment, generate a new prompt segment to use on all segments
         prompt=None,
     )
+    for idx, verse in tqdm(enumerate(melodys), total=len(melodys), desc="Generating melody segments"):
         if INTERRUPTING:
             return output_segments, duration

modules/gradio.py CHANGED Viewed

@@ -9,6 +9,7 @@ import shutil
 import subprocess
 from tempfile import NamedTemporaryFile
 from pathlib import Path
 class MatplotlibBackendMananger:
@@ -42,6 +43,7 @@ def make_waveform(
     bar_width: float = 0.6,
     animate: bool = False,
     name: str = "",
 ) -> str:
     """
     Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.

 import subprocess
 from tempfile import NamedTemporaryFile
 from pathlib import Path
+from tqdm import tqdm
 class MatplotlibBackendMananger:
     bar_width: float = 0.6,
     animate: bool = False,
     name: str = "",
+    progress= gr.Progress(track_tqdm=True)
 ) -> str:
     """
     Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.