|
import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, librosa, time, librosa, librosa.display, matplotlib.pyplot as plt, numpy as np, urllib.error, traceback, yt_dlp |
|
from pytube import YouTube |
|
from moviepy.editor import VideoFileClip |
|
|
|
def download_video(url, download_as, use_ytdlp): |
|
if use_ytdlp == True: |
|
try: |
|
ydl_opts = { |
|
'format': f"{download_as}/bestaudio/best", |
|
'postprocessors': [{ |
|
'key': 'FFmpegExtractAudio', |
|
'preferredcodec': download_as, |
|
}] |
|
} |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download(url) |
|
for i in glob.glob(f"*.{download_as}"): |
|
if os.path.exists(i): |
|
os.rename(i, f"output.{download_as}") |
|
return "Finished downloading! Please proceed to next tab." |
|
except: |
|
raise gr.Error(traceback.format_exc()) |
|
else: |
|
try: |
|
yt = YouTube(url) |
|
except pytube.exceptions.RegexMatchError: |
|
raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") |
|
except urllib.error.HTTPError as not_ok: |
|
raise gr.Error(f"Recieved {not_ok}") |
|
except pytube.exceptions.AgeRestrictedError: |
|
raise gr.Error("The video you inputted is age-restricted! Please try another link.") |
|
video = yt.streams.get_highest_resolution() |
|
video.download() |
|
video_path = f"{video.default_filename}" |
|
video_clip = VideoFileClip(video_path) |
|
audio_clip = video_clip.audio |
|
if download_as == "wav": |
|
audio_clip.write_audiofile("output.wav") |
|
elif download_as == "mp3": |
|
audio_clip.write_audiofile("output.mp3") |
|
audio_clip.close() |
|
video_clip.close() |
|
for removalmp4 in glob.glob("*.mp4"): |
|
os.remove(removalmp4) |
|
return "Finished downloading! Please proceed to next tab." |
|
|
|
def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): |
|
if show_amount_of_files_and_file_dur == True: |
|
gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.") |
|
if not os.path.exists("output.mp3") and not os.path.exists("output.wav"): |
|
raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?") |
|
if mindur == maxdur: |
|
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") |
|
elif mindur > maxdur: |
|
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") |
|
elif name_for_split_files == None: |
|
raise gr.Error("Split files name cannot be empty!") |
|
else: |
|
audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3" |
|
audio_regions = auditok.split( |
|
audio_path, |
|
min_dur=mindur, |
|
max_dur=maxdur, |
|
max_silence=0.3, |
|
energy_threshold=45 |
|
) |
|
os.remove(audio_path) |
|
for i, r in enumerate(audio_regions): |
|
filename = r.save(f"{name_for_split_files}-{i+1}.wav") |
|
for f in sorted(glob.glob("*.wav")): |
|
audio_files = glob.glob("*.wav") |
|
zip_file_name = "audio_files.zip" |
|
with zipfile.ZipFile(zip_file_name, "w") as zip_file: |
|
for audio_file in audio_files: |
|
zip_file.write(audio_file, os.path.basename(audio_file)) |
|
if show_amount_of_files_and_file_dur == False: |
|
for file2 in glob.glob("*.wav"): |
|
os.remove(file2) |
|
return "Files split successfully!\nCheck below for zipped files.", zip_file_name |
|
elif show_amount_of_files_and_file_dur == True: |
|
largest_file = ("", 0) |
|
total_files = 0 |
|
total_length = 0.0 |
|
for file_name in glob.glob("*.wav"): |
|
file_path = os.path.join(os.getcwd(), file_name) |
|
if file_path.lower().endswith(".wav"): |
|
try: |
|
with wave.open(file_path, 'r') as audio_file: |
|
frames = audio_file.getnframes() |
|
rate = audio_file.getframerate() |
|
duration = frames / float(rate) |
|
file_size = os.path.getsize(file_path) |
|
if file_size > largest_file[1]: |
|
largest_file = (file_name, file_size) |
|
total_length += duration |
|
total_files += 1 |
|
except wave.Error as e: |
|
raise gr.Error(f"Error reading file: {e}") |
|
length_mins = total_length / 60 |
|
for file2 in glob.glob("*.wav"): |
|
os.remove(file2) |
|
return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name |
|
|
|
def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2): |
|
if audiofileuploader == None: |
|
raise gr.Error("Audio file cannot be empty!") |
|
if mindur2 == maxdur2: |
|
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") |
|
elif mindur2 > maxdur2: |
|
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") |
|
elif name_for_split_files2 == None: |
|
raise gr.Error("Split files name cannot be empty!") |
|
else: |
|
audio_path = audiofileuploader |
|
audio_regions = auditok.split( |
|
audio_path, |
|
min_dur=mindur2, |
|
max_dur=maxdur2, |
|
max_silence=0.3, |
|
energy_threshold=45 |
|
) |
|
os.remove(audio_path) |
|
for i, r in enumerate(audio_regions): |
|
filename = r.save(f"{name_for_split_files2}-{i+1}.wav") |
|
for f in sorted(glob.glob("*.wav")): |
|
audio_files = glob.glob("*.wav") |
|
zip_file_name2 = "audio_files.zip" |
|
with zipfile.ZipFile(zip_file_name2, "w") as zip_file: |
|
for audio_file in audio_files: |
|
zip_file.write(audio_file, os.path.basename(audio_file)) |
|
for file2 in glob.glob("*.wav"): |
|
os.remove(file2) |
|
return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2 |
|
|
|
def all_in_one_inator(ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files, progress=gr.Progress()): |
|
if download_as == "mp3": |
|
gr.Warning("MP3 is experimental, especially with this, so caution is advised.") |
|
if min_duration == max_duration: |
|
raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, min and max are the same number.") |
|
elif min_duration > max_duration: |
|
raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, mindur is higher than maxdur.") |
|
elif name_for_outputted_split_files == None: |
|
raise gr.Error("Split files name cannot be empty!") |
|
else: |
|
try: |
|
progress(0, "Downloading video...") |
|
yt = YouTube(ytvideo) |
|
except pytube.exceptions.RegexMatchError: |
|
raise gr.Error("URL not valid or was left empty! Please fix the link or enter one.") |
|
except urllib.error.HTTPError as not_ok: |
|
raise gr.Error(f"Recieved {not_ok}") |
|
except pytube.exceptions.AgeRestrictedError: |
|
raise gr.Error("The video you inputted is age-restricted! Please try another link.") |
|
video = yt.streams.get_highest_resolution() |
|
video.download() |
|
video_path = f"{video.default_filename}" |
|
video_clip = VideoFileClip(video_path) |
|
audio_clip = video_clip.audio |
|
if download_yt_video_as == "wav": |
|
audio_clip.write_audiofile("output.wav") |
|
elif download_yt_video_as == "mp3": |
|
audio_clip.write_audiofile("output.mp3") |
|
audio_clip.close() |
|
video_clip.close() |
|
for removemp4 in glob.glob("*.mp4"): |
|
os.remove(removemp4) |
|
progress(0.5, "Video downloaded! Starting split process...") |
|
audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3" |
|
audio_regions = auditok.split( |
|
audio_path, |
|
min_dur=min_duration, |
|
max_dur=max_duration, |
|
max_silence=0.3, |
|
energy_threshold=45 |
|
) |
|
os.remove(audio_path) |
|
for i, r in enumerate(audio_regions): |
|
filename = r.save(f"{name_for_outputted_split_files}-{i+1}.wav") |
|
for f in sorted(glob.glob("*.wav")): |
|
audio_files = glob.glob("*.wav") |
|
zip_file_name = "audio_files.zip" |
|
with zipfile.ZipFile(zip_file_name, 'w') as zip_file: |
|
for audio_file in audio_files: |
|
zip_file.write(audio_file, os.path.basename(audio_file)) |
|
for file2 in glob.glob("*.wav"): |
|
os.remove(file2) |
|
progress(1, "Done! Cleaning up...") |
|
time.sleep(2) |
|
return "Process done successfully! Check below for zipped files!", zip_file_name |
|
|
|
def download_video_as_audio_only(yt_video, audio_output_format): |
|
try: |
|
yt = YouTube(yt_video) |
|
except pytube.exceptions.RegexMatchError: |
|
raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") |
|
except urllib.error.HTTPError as not_ok: |
|
raise gr.Error(f"Recieved {not_ok}") |
|
except pytube.exceptions.AgeRestrictedError: |
|
raise gr.Error("The video you inputted is age-restricted! Please try another link.") |
|
video = yt.streams.get_highest_resolution() |
|
video.download() |
|
video_path = f"{video.default_filename}" |
|
video_clip = VideoFileClip(video_path) |
|
audio_clip = video_clip.audio |
|
if audio_output_format == "wav": |
|
audio_clip.write_audiofile("output.wav") |
|
elif audio_output_format == "mp3": |
|
audio_clip.write_audiofile("output.mp3") |
|
audio_clip.close() |
|
video_clip.close() |
|
for mp4remove in glob.glob("*.mp4"): |
|
os.remove(mp4remove) |
|
single_zip_name = "only_audio.zip" |
|
audio_files = glob.glob("*.wav") if audio_output_format == "wav" else glob.glob("*.mp3") |
|
with zipfile.ZipFile(single_zip_name, 'w') as zip_file: |
|
for audio_file in audio_files: |
|
zip_file.write(audio_file, os.path.basename(audio_file)) |
|
for outputwavremoval in glob.glob("*.wav"): |
|
if os.path.exists(outputwavremoval): |
|
os.remove(outputwavremoval) |
|
for outputmp3removal in glob.glob("*.mp3"): |
|
if os.path.exists(outputmp3removal): |
|
os.remove(outputmp3removal) |
|
return f"Done! Download the zip file below! This only contains the audio file.\n\nYou have downloaded {yt.title} by {yt.author}.", single_zip_name |
|
|
|
def check_for_remaining_wav_or_mp3_files(which_filetype): |
|
audio_files = glob.glob(f"*.{which_filetype}") |
|
return f"There are {len(audio_files)} leftover files." |
|
|
|
def display_audio_spectrogram(audio_file): |
|
if audio_file == None: |
|
raise gr.Error("Cannot leave WAV field empty! Please insert a WAV file.") |
|
else: |
|
y, sr = librosa.load(audio_file) |
|
d = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) |
|
plt.figure(figsize=(12, 8)) |
|
librosa.display.specshow(d, sr=sr, x_axis="time", y_axis="log") |
|
plt.colorbar(format="%+2.0f db") |
|
plt.title("Spectrogram") |
|
output = "spectrogram.png" |
|
plt.savefig(output) |
|
plt.close() |
|
return output |
|
|
|
with gr.Blocks(theme='sudeepshouche/minimalist', title="Global Dataset Maker") as app: |
|
gr.HTML( |
|
"<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>" |
|
) |
|
gr.Markdown("## Duplicate this space if you want to make your own changes!") |
|
gr.HTML( |
|
"""<p style="margin:5px auto;display: flex;justify-content: left;"> |
|
<a href="https://huggingface.co/spaces/Kryptone/GDMGS?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md-dark.svg" alt="Duplicate this Space"></a> |
|
</p>""" |
|
) |
|
gr.Markdown( |
|
"This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**" |
|
) |
|
gr.HTML( |
|
"<h2> This Space's storage is ephemeral, meaning once you reload this space, all audio files will be lost. </h2>" |
|
) |
|
with gr.Tabs(): |
|
with gr.TabItem("Download Video"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
url = gr.Textbox(label="URL") |
|
download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?") |
|
use_ytdlp = gr.Checkbox(False, label="Use yt_dlp instead of pytube?", info="Sometimes Pytube refuses to download a video. If that happens, check this box to download using yt_dlp instead.") |
|
convertion = gr.Button("Download", variant='primary') |
|
convertion.click( |
|
fn=download_video, |
|
inputs=[url, download_as], |
|
outputs=gr.Text(label="Output") |
|
) |
|
with gr.TabItem("Split audio files"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) |
|
maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) |
|
name_for_split_files = gr.Textbox(label="Name for split files") |
|
show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") |
|
splitbtn = gr.Button("Split", variant='primary') |
|
splitbtn.click( |
|
split_audio_from_yt_video, |
|
inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], |
|
outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] |
|
) |
|
with gr.TabItem("Misc tools"): |
|
with gr.Tab("File splitter"): |
|
gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file") |
|
mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) |
|
maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) |
|
name_for_split_files2 = gr.Textbox(label="Name for split files") |
|
audiofileuploadbtn = gr.Button("Split", variant='primary') |
|
audiofileuploadbtn.click( |
|
split_wav_or_mp3_file, |
|
[audiofileuploader, mindur2, maxdur2, name_for_split_files2], |
|
[gr.Text(label="Output"), gr.File(label="Zipped files")] |
|
) |
|
with gr.Tab("All-in-one downloader and splitter"): |
|
gr.Markdown("This is very experimental and may break or change in the future. This essentially combines both the first 2 tabs into an all-in-one script.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
ytvideo = gr.Textbox(label="URL") |
|
download_yt_video_as = gr.Radio(["wav", "mp3"], value="wav", label="Audio output format") |
|
min_duration = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) |
|
max_duration = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) |
|
name_for_outputted_split_files = gr.Textbox(label="Name for split files") |
|
download_and_split_btn = gr.Button("Download and split", variant='primary') |
|
download_and_split_btn.click( |
|
all_in_one_inator, |
|
[ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files], |
|
[gr.Text(label="Result"), gr.File(label="Zipped files")] |
|
) |
|
with gr.Tab("Audio only download"): |
|
gr.Markdown("If you want to download only the audio (to isolate bgm using UVR, etc), use this method, which will only extract audio and not split the audio.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
yt_video = gr.Textbox(label="URL") |
|
audio_output_format = gr.Radio(["wav", "mp3"], value="wav", label="Download audio as:") |
|
commence_download = gr.Button("Download", variant='primary') |
|
commence_download.click( |
|
download_video_as_audio_only, |
|
[yt_video, audio_output_format], |
|
[gr.Text(label="Output"), gr.File(label="Zipped audio file")] |
|
) |
|
with gr.Tab("Check for leftover mp3, wav, or zip files"): |
|
gr.Markdown("There might be instances where sometimes a few wav, mp3, or zip files are left over after a conversion. This section tells how many of those files are left, if any.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
which_filetype = gr.Radio(["wav", "mp3", "zip"], value="wav", label="Search for what filetype?") |
|
checkbtn = gr.Button("Check for files", variant='primary') |
|
checkbtn.click( |
|
check_for_remaining_wav_or_mp3_files, |
|
which_filetype, |
|
gr.Text(label="Result") |
|
) |
|
with gr.Tab("Audio spectrogram"): |
|
gr.Markdown("Insert a wav file here and this will show the spectrogram for it.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
filetoanalyze = gr.File(file_count='single', file_types=[".wav"], label="WAV file") |
|
analyzebtn = gr.Button("Display", variant='primary') |
|
analyzebtn.click( |
|
display_audio_spectrogram, |
|
filetoanalyze, |
|
gr.Image(label="Spectrogram result", show_download_button=False, scale=2) |
|
) |
|
with gr.TabItem("Changelog"): |
|
gr.Markdown("v0.98.5 - Added an option to download yt link via yt_dlp if pytube cant download it.") |
|
gr.Markdown("v0.98.2 - Added new upcoming features tab.") |
|
with gr.TabItem("Upcoming features"): |
|
gr.HTML( |
|
""" |
|
<a href="https://imgbb.com/"><img src="https://i.ibb.co/4Vt9K3c/image-2024-01-26-052541742.png" border="0"></a><br /> |
|
""" |
|
) |
|
app.launch() |