Spaces:
Running
Running
File size: 8,013 Bytes
d7ff226 bc54299 d7ff226 bc54299 d7ff226 bc54299 d7ff226 bc54299 d7ff226 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import gradio as gr
import librosa
from PIL import Image, ImageDraw, ImageFont
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, TIT2, TPE1
import io
from colorthief import ColorThief
import colorsys
import math
import os
from multiprocessing import Pool, cpu_count
import tempfile
import ffmpeg
import subprocess
import traceback
path = 'C:/Users/ziqia/Downloads/oooo/' # Update with your path
def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
i = idx - res // 2
x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
c = []
while i < idx + (res // 2):
c.append((x, y))
i += 1
y = (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
x += (size[0] * .9) / res
return c
def center_to_top_left(coords, width=1280, height=720):
new_coords = []
for x, y in coords:
new_coords.append(totopleft((x, y), width=width, height=height))
return new_coords
def totopleft(coord, width=1280, height=720):
return coord[0] + width / 2, height / 2 - coord[1]
def getTrigger(ad: int, a: list, max: int = 1024) -> int:
i = ad
while not (a[i] < 128 and not a[i + 2] < 128 or i - ad > max):
i += 1
return i
def extract_cover_image(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
for tag in audio.tags.values():
if isinstance(tag, APIC):
image_data = tag.data
cover_image = Image.open(io.BytesIO(image_data))
return cover_image
print("No cover image found in the MP3 file.")
return None
def getTitleAndArtist(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
return title, artist
def getColour(img):
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
img.save(tmpfile.name, format="PNG")
color_thief = ColorThief(tmpfile.name)
dominant_color = color_thief.get_color(quality=1)
os.remove(tmpfile.name)
return dominant_color
def clamp(number):
return max(0, min(number, 1))
def normalizeColour(C) -> tuple[int, int, int]:
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def normalizeColourBar(C) -> tuple[int, int, int]:
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def stamp_text(draw, text, font, position, align='left'):
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x, y = position
y -= text_height // 2
if align == 'center':
x -= text_width // 2
elif align == 'right':
x -= text_width
draw.text((x, y), text, font=font, fill="#fff")
def linear_interpolate(start, stop, progress):
return start + progress * (stop - start)
def render_frame(params):
n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres = params
num_frames = len(samples_array) // (11025 // fps)
img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
d = ImageDraw.Draw(img)
s = (11025 // fps) * n
if s > len(samples_array):
return
e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
d.line(e, fill='#fff', width=2)
cs = math.floor(min(width, height) / 2)
cov = cover_img.resize((cs, cs))
img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)//720))
fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)//720))
fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)//720))
stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
fill=normalizeColourBar(dominant_color), width=15 * height // 360)
d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
os.makedirs(path+f'out/{name}/', exist_ok=True)
img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)
def RenderVid(af, n, fps=30):
(ffmpeg
.input(path+f'out/{n}/%d.png', framerate=fps)
.input(af)
.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
.run()
)
def main(file, name, fps=30, res: tuple=(1280,720), oscres = 512):
global iii
iii = 0
# Load the audio file
audio_path = file
y, sr = librosa.load(audio_path, sr=11025) # Resample to 11025 Hz
y_u8 = (y * 128 + 128).astype('uint8')
samples_array = y_u8.tolist()
# Extract cover image, title, and artist
cover_img = extract_cover_image(audio_path)
if cover_img is None:
return # Exit if no cover image found
title, artist = getTitleAndArtist(audio_path)
dominant_color = getColour(cover_img)
# Frame rendering parameters
width, height, fps = res[0], res[1], fps
num_frames = len(samples_array) // (11025 // fps)
# Prepare parameters for each frame
params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres) for n in range(num_frames)]
try:
with Pool(cpu_count()) as pool:
pool.map(render_frame, params)
except Exception as e:
print('Ended in error: ' + traceback.format_exc())
print('FFMPEG')
ffmpeg_cmd = [
"ffmpeg",
'-framerate', '30',
'-i', path+f'out/{name}/%d.png', # Input PNG images
'-i', f'{file}', # Input MP3 audio
'-c:v', 'libx264',
'-r', '30',
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-shortest',
path+f'{name}.mp4' # Output MP4 filename
]
subprocess.run(ffmpeg_cmd)
def gradio_interface(audio_file, output_name, fps=30, vidwidth=1280, vidhight=720, oscres=512):
resolution = f"{vidwidth}x{vidhight}"
res = tuple(map(int, resolution.split('x')))
main(audio_file, output_name, fps=fps, res=res, oscres=oscres)
return f"Output video '{output_name}.mp4' has been created. Click the link to download."
# Define Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.components.File(label="Upload your MP3 file"),
gr.components.Textbox(label="Output Video Name (without extension)"),
gr.components.Slider(label="Frames per Second", value=30, minimum=30, maximum=60, step=1),
gr.components.Slider(label="Output Video Width", value=1280, minimum=100, maximum=4096),
gr.components.Slider(label="Output Video Height", value=720, minimum=100, maximum=2160),
gr.components.Slider(label="Number of Visualization Segments", value=512, minimum=128, maximum=2048, step=2)
],
outputs=[gr.components.Textbox(label="Output")],
title="MP3 to Video Visualization",
description="Upload an MP3 file and configure parameters to create a visualization video."
)
# Launch Gradio interface
iface.launch()
|