|
import gradio as gr |
|
import os |
|
import shutil |
|
import spaces |
|
import sys |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from qa_mdt.pipeline import MOSDiffusionPipeline |
|
|
|
|
|
pipe = MOSDiffusionPipeline() |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
def generate_waveform(description): |
|
high_quality_description = "high quality " + description |
|
pipe(high_quality_description) |
|
|
|
generated_file_path = "./awesome.wav" |
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(generated_file_path): |
|
waveform_video = gr.make_waveform(audio=generated_file_path, bg_color="#000000", bars_color="#00FF00", bar_count=100, bar_width=1.5, animate=True) |
|
return waveform_video, generated_file_path |
|
else: |
|
return "Error: Failed to generate the waveform." |
|
|
|
|
|
intro = """ |
|
# ๐ถ OpenMusic: Diffusion That Plays Music ๐ง ๐น |
|
|
|
Welcome to **OpenMusic**, a next-gen diffusion model designed to generate high-quality music audio from text descriptions! |
|
|
|
Simply enter a few words describing the vibe, and watch as the model generates a unique track for your input. |
|
|
|
Powered by the QA-MDT model, based on the new research paper linked below. |
|
|
|
- [GitHub Repo](https://github.com/ivcylc/qa-mdt) by [@changli](https://github.com/ivcylc) ๐. |
|
- [Paper](https://arxiv.org/pdf/2405.15863) |
|
- [HuggingFace](https://huggingface.co/jadechoghari/qa_mdt) [@jadechoghari](https://github.com/jadechoghari) ๐ค. |
|
|
|
Note: The music generation process will take 1-2 minutes ๐ถ |
|
--- |
|
|
|
""" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_waveform, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter a music description here..."), |
|
|
|
outputs=[gr.Video(label="Watch the Waveform ๐ผ"), gr.Audio(label="Download the Music ๐ถ")], |
|
description=intro, |
|
examples=[ |
|
["A modern synthesizer creating futuristic soundscapes."], |
|
["Acoustic ballad with heartfelt lyrics and soft piano."] |
|
], |
|
cache_examples=True |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|