File size: 5,640 Bytes
635ad89 f088aee 635ad89 f088aee c1a6d8e f088aee 8f2153e f088aee 0a26beb f088aee 0a26beb f088aee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr, glob, os, auditok, random, zipfile, wave, pytube.exceptions
from pytube import YouTube
from moviepy.editor import VideoFileClip
import auditok
def download_video(url):
try:
yt = YouTube(url)
except pytube.exceptions.RegexMatchError:
raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
video = yt.streams.get_highest_resolution()
video.download()
video_path = f"{video.default_filename}"
video_clip = VideoFileClip(video_path)
audio_clip = video_clip.audio
audio_clip.write_audiofile("output.wav")
audio_clip.close()
video_clip.close()
for removalmp4 in glob.glob("*.mp4"):
os.remove(removalmp4)
return "Finished downloading! Please proceed to final tab."
def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
if show_amount_of_files_and_file_dur == True:
gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.")
if not os.path.exists("output.wav"):
raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?")
if mindur == maxdur:
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
elif mindur > maxdur:
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
elif name_for_split_files == None:
raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.")
else:
audio_path = "output.wav"
audio_regions = auditok.split(
audio_path,
min_dur=mindur,
max_dur=maxdur,
max_silence=0.3,
energy_threshold=45
)
os.remove(audio_path)
for i, r in enumerate(audio_regions):
filename = r.save(f"{name_for_split_files}-{i+1}.wav")
for f in sorted(glob.glob("*.wav")):
set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav"
audio_files = glob.glob("*.wav")
zip_file_name = "audio_files.zip"
with zipfile.ZipFile(zip_file_name, "w") as zip_file:
for audio_file in audio_files:
zip_file.write(audio_file, os.path.basename(audio_file))
if show_amount_of_files_and_file_dur == False:
return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name
elif show_amount_of_files_and_file_dur == True:
largest_file = ("", 0)
total_files = 0
total_length = 0.0
for file_name in glob.glob("*.wav"):
file_path = os.path.join(os.getcwd(), file_name)
if file_path.lower().endswith(".wav"):
try:
with wave.open(file_path, 'r') as audio_file:
frames = audio_file.getnframes()
rate = audio_file.getframerate()
duration = frames / float(rate)
file_size = os.path.getsize(file_path)
if file_size > largest_file[1]:
largest_file = (file_name, file_size)
total_length += duration
total_files += 1
except wave.Error as e:
raise gr.Error(f"Error reading file: {e}")
length_mins = total_length / 60
for file2 in glob.glob("*.wav"):
os.remove(file2)
return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name
with gr.Blocks(theme=gr.themes.Soft(), title="Global Dataset Maker") as app:
gr.HTML(
"<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
)
gr.Markdown(
"This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete, so please be patient.**"
)
with gr.Tabs():
with gr.TabItem("Download Video"):
with gr.Row():
with gr.Column():
with gr.Row():
url = gr.Textbox(label="URL")
convertion = gr.Button("Download", variant='primary')
convertion.click(
fn=download_video,
inputs=[url],
outputs=gr.Text(label="Output")
)
with gr.TabItem("Split audio files"):
with gr.Row():
with gr.Column():
with gr.Row():
mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=8)
name_for_split_files = gr.Textbox(label="Name for split files")
show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
splitbtn = gr.Button("Split", variant='primary')
splitbtn.click(
split_audio,
inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
)
app.launch() |