Spaces:
Paused
Paused
Sylvain Filoni
commited on
Commit
β’
7fb6157
1
Parent(s):
3d381f7
added duration controls
Browse files- README.md +1 -1
- app.py +87 -41
- constants.py +9 -0
- requirements.txt +5 -0
- utils.py +50 -0
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
πΆ
|
|
4 |
colorFrom: green
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.15.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -1,4 +1,11 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
import requests
|
4 |
import urllib
|
@@ -7,7 +14,6 @@ from os import path
|
|
7 |
from pydub import AudioSegment
|
8 |
|
9 |
img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
|
10 |
-
text_to_music = gr.Interface.load("spaces/fffiloni/text-2-music")
|
11 |
|
12 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
13 |
|
@@ -15,22 +21,59 @@ def get_prompts(uploaded_image):
|
|
15 |
|
16 |
prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
|
17 |
|
18 |
-
music_result =
|
19 |
|
20 |
-
return music_result
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
save_as = "file.mp3"
|
35 |
|
36 |
data = urllib.request.urlopen(url)
|
@@ -44,7 +87,7 @@ def get_music(prompt):
|
|
44 |
sound = AudioSegment.from_mp3(save_as)
|
45 |
sound.export(wave_file, format="wav")
|
46 |
|
47 |
-
return wave_file
|
48 |
|
49 |
css = """
|
50 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
@@ -79,39 +122,42 @@ a {text-decoration-line: underline; font-weight: 600;}
|
|
79 |
"""
|
80 |
|
81 |
with gr.Blocks(css=css) as demo:
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
|
103 |
|
104 |
input_img = gr.Image(type="filepath", elem_id="input-img")
|
|
|
|
|
|
|
105 |
generate = gr.Button("Generate Music from Image")
|
106 |
|
107 |
music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
|
108 |
|
109 |
with gr.Group(elem_id="share-btn-container"):
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
|
114 |
-
|
115 |
-
|
116 |
|
117 |
demo.queue(max_size=32, concurrency_count=20).launch()
|
|
|
1 |
+
import time
|
2 |
+
import base64
|
3 |
import gradio as gr
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
import httpx
|
7 |
+
import json
|
8 |
+
|
9 |
import os
|
10 |
import requests
|
11 |
import urllib
|
|
|
14 |
from pydub import AudioSegment
|
15 |
|
16 |
img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
|
|
|
17 |
|
18 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
19 |
|
|
|
21 |
|
22 |
prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
|
23 |
|
24 |
+
music_result = generate_track_by_prompt(prompt, duration, gen_intensity, audio_format)
|
25 |
|
26 |
+
return music_result[0], gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
27 |
|
28 |
+
from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat
|
29 |
+
|
30 |
+
minilm = SentenceTransformer('all-MiniLM-L6-v2')
|
31 |
+
mubert_tags_embeddings = get_mubert_tags_embeddings(minilm)
|
32 |
+
|
33 |
+
|
34 |
+
def get_track_by_tags(tags, pat, duration, gen_intensity, maxit=20, loop=False):
|
35 |
+
if loop:
|
36 |
+
mode = "loop"
|
37 |
+
else:
|
38 |
+
mode = "track"
|
39 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/RecordTrackTTM',
|
40 |
+
json={
|
41 |
+
"method": "RecordTrackTTM",
|
42 |
+
"params": {
|
43 |
+
"pat": pat,
|
44 |
+
"duration": duration,
|
45 |
+
"format": "wav",
|
46 |
+
"intensity":gen_intensity,
|
47 |
+
"tags": tags,
|
48 |
+
"mode": mode
|
49 |
+
}
|
50 |
+
})
|
51 |
+
|
52 |
+
rdata = json.loads(r.text)
|
53 |
+
assert rdata['status'] == 1, rdata['error']['text']
|
54 |
+
trackurl = rdata['data']['tasks'][0]['download_link']
|
55 |
+
|
56 |
+
print('Generating track ', end='')
|
57 |
+
for i in range(maxit):
|
58 |
+
r = httpx.get(trackurl)
|
59 |
+
if r.status_code == 200:
|
60 |
+
return trackurl
|
61 |
+
time.sleep(1)
|
62 |
+
|
63 |
+
|
64 |
+
def generate_track_by_prompt(prompt, duration, gen_intensity):
|
65 |
+
try:
|
66 |
+
pat = get_pat("[email protected]")
|
67 |
+
_, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0]
|
68 |
+
result = get_track_by_tags(tags, pat, int(duration), gen_intensity, loop=False)
|
69 |
+
print(result)
|
70 |
+
return result, ",".join(tags), "Success"
|
71 |
+
except Exception as e:
|
72 |
+
return None, "", str(e)
|
73 |
+
|
74 |
+
def convert_mp3_to_wav(mp3_filepath):
|
75 |
+
|
76 |
+
url = mp3_filepath
|
77 |
save_as = "file.mp3"
|
78 |
|
79 |
data = urllib.request.urlopen(url)
|
|
|
87 |
sound = AudioSegment.from_mp3(save_as)
|
88 |
sound.export(wave_file, format="wav")
|
89 |
|
90 |
+
return wave_file
|
91 |
|
92 |
css = """
|
93 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
|
|
122 |
"""
|
123 |
|
124 |
with gr.Blocks(css=css) as demo:
|
125 |
+
with gr.Column(elem_id="col-container"):
|
126 |
+
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
127 |
+
<div
|
128 |
+
style="
|
129 |
+
display: inline-flex;
|
130 |
+
align-items: center;
|
131 |
+
gap: 0.8rem;
|
132 |
+
font-size: 1.75rem;
|
133 |
+
"
|
134 |
+
>
|
135 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
|
136 |
+
Image to Music
|
137 |
+
</h1>
|
138 |
+
</div>
|
139 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
140 |
+
Sends an image in to <a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator" target="_blank">CLIP Interrogator</a>
|
141 |
+
to generate a text prompt which is then run through
|
142 |
+
<a href="https://huggingface.co/Mubert" target="_blank">Mubert</a> text-to-music to generate music from the input image!
|
143 |
+
</p>
|
144 |
+
</div>""")
|
145 |
|
146 |
|
147 |
input_img = gr.Image(type="filepath", elem_id="input-img")
|
148 |
+
with gr.Row():
|
149 |
+
track_duration = gr.Slider(minimum=20, maximum=120, value=30, step=5, label="Track duration", elem_id="duration-inp")
|
150 |
+
gen_intensity = gr.Dropdown(choices=["low", "medium", "high"], value="high", label="Complexity")
|
151 |
generate = gr.Button("Generate Music from Image")
|
152 |
|
153 |
music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
|
154 |
|
155 |
with gr.Group(elem_id="share-btn-container"):
|
156 |
+
community_icon = gr.HTML(community_icon_html, visible=False)
|
157 |
+
loading_icon = gr.HTML(loading_icon_html, visible=False)
|
158 |
+
share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
159 |
|
160 |
+
generate.click(get_prompts, inputs=[input_img,track_duration,gen_intensity], outputs=[music_output, share_button, community_icon, loading_icon], api_name="i2m")
|
161 |
+
share_button.click(None, [], [], _js=share_js)
|
162 |
|
163 |
demo.queue(max_size=32, concurrency_count=20).launch()
|
constants.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import os
|
3 |
+
|
4 |
+
MUBERT_LICENCE = os.environ.get('MUBERT_LICENCE')
|
5 |
+
MUBERT_TOKEN = os.environ.get('MUBERT_TOKEN')
|
6 |
+
|
7 |
+
MUBERT_MODE = "loop"
|
8 |
+
MUBERT_TAGS_STRING = 'tribal,action,kids,neo-classic,run 130,pumped,jazz / funk,ethnic,dubtechno,reggae,acid jazz,liquidfunk,funk,witch house,tech house,underground,artists,mystical,disco,sensorium,r&b,agender,psychedelic trance / psytrance,peaceful,run 140,piano,run 160,setting,meditation,christmas,ambient,horror,cinematic,electro house,idm,bass,minimal,underscore,drums,glitchy,beautiful,technology,tribal house,country pop,jazz & funk,documentary,space,classical,valentines,chillstep,experimental,trap,new jack swing,drama,post-rock,tense,corporate,neutral,happy,analog,funky,spiritual,sberzvuk special,chill hop,dramatic,catchy,holidays,fitness 90,optimistic,orchestra,acid techno,energizing,romantic,minimal house,breaks,hyper pop,warm up,dreamy,dark,urban,microfunk,dub,nu disco,vogue,keys,hardcore,aggressive,indie,electro funk,beauty,relaxing,trance,pop,hiphop,soft,acoustic,chillrave / ethno-house,deep techno,angry,dance,fun,dubstep,tropical,latin pop,heroic,world music,inspirational,uplifting,atmosphere,art,epic,advertising,chillout,scary,spooky,slow ballad,saxophone,summer,erotic,jazzy,energy 100,kara mar,xmas,atmospheric,indie pop,hip-hop,yoga,reggaeton,lounge,travel,running,folk,chillrave & ethno-house,detective,darkambient,chill,fantasy,minimal techno,special,night,tropical house,downtempo,lullaby,meditative,upbeat,glitch hop,fitness,neurofunk,sexual,indie rock,future pop,jazz,cyberpunk,melancholic,happy hardcore,family / kids,synths,electric guitar,comedy,psychedelic trance & psytrance,edm,psychedelic rock,calm,zen,bells,podcast,melodic house,ethnic percussion,nature,heavy,bassline,indie dance,techno,drumnbass,synth pop,vaporwave,sad,8-bit,chillgressive,deep,orchestral,futuristic,hardtechno,nostalgic,big room,sci-fi,tutorial,joyful,pads,minimal 170,drill,ethnic 108,amusing,sleepy ambient,psychill,italo disco,lofi,house,acoustic guitar,bassline house,rock,k-pop,synthwave,deep house,electronica,gabber,nightlife,sport & fitness,road trip,celebration,electro,disco house,electronic'
|
9 |
+
MUBERT_TAGS = np.array(MUBERT_TAGS_STRING.split(','))
|
requirements.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
pydub
|
2 |
ffmpeg
|
3 |
requests
|
|
|
1 |
+
httpx
|
2 |
+
sentence-transformers
|
3 |
+
ffmpeg
|
4 |
+
audio2numpy
|
5 |
+
|
6 |
pydub
|
7 |
ffmpeg
|
8 |
requests
|
utils.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import numpy as np
|
3 |
+
import httpx
|
4 |
+
import os
|
5 |
+
|
6 |
+
from constants import MUBERT_TAGS, MUBERT_MODE, MUBERT_LICENCE, MUBERT_TOKEN
|
7 |
+
|
8 |
+
def get_mubert_tags_embeddings(w2v_model):
|
9 |
+
return w2v_model.encode(MUBERT_TAGS)
|
10 |
+
|
11 |
+
|
12 |
+
def get_pat(email: str):
|
13 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/GetServiceAccess',
|
14 |
+
json={
|
15 |
+
"method": "GetServiceAccess",
|
16 |
+
"params": {
|
17 |
+
"email": email,
|
18 |
+
"license": MUBERT_LICENCE,
|
19 |
+
"token": MUBERT_TOKEN,
|
20 |
+
"mode": MUBERT_MODE,
|
21 |
+
}
|
22 |
+
})
|
23 |
+
|
24 |
+
rdata = json.loads(r.text)
|
25 |
+
assert rdata['status'] == 1, "probably incorrect e-mail"
|
26 |
+
pat = rdata['data']['pat']
|
27 |
+
return pat
|
28 |
+
|
29 |
+
|
30 |
+
def find_similar(em, embeddings, method='cosine'):
|
31 |
+
scores = []
|
32 |
+
for ref in embeddings:
|
33 |
+
if method == 'cosine':
|
34 |
+
scores.append(1 - np.dot(ref, em) / (np.linalg.norm(ref) * np.linalg.norm(em)))
|
35 |
+
if method == 'norm':
|
36 |
+
scores.append(np.linalg.norm(ref - em))
|
37 |
+
return np.array(scores), np.argsort(scores)
|
38 |
+
|
39 |
+
|
40 |
+
def get_tags_for_prompts(w2v_model, mubert_tags_embeddings, prompts, top_n=3, debug=False):
|
41 |
+
prompts_embeddings = w2v_model.encode(prompts)
|
42 |
+
ret = []
|
43 |
+
for i, pe in enumerate(prompts_embeddings):
|
44 |
+
scores, idxs = find_similar(pe, mubert_tags_embeddings)
|
45 |
+
top_tags = MUBERT_TAGS[idxs[:top_n]]
|
46 |
+
top_prob = 1 - scores[idxs[:top_n]]
|
47 |
+
if debug:
|
48 |
+
print(f"Prompt: {prompts[i]}\nTags: {', '.join(top_tags)}\nScores: {top_prob}\n\n\n")
|
49 |
+
ret.append((prompts[i], list(top_tags)))
|
50 |
+
return ret
|