import gradio as gr, glob, os, auditok, random, zipfile, wave from pytube import YouTube from moviepy.editor import VideoFileClip import auditok def download_video(url): yt = YouTube(url) video = yt.streams.get_highest_resolution() video.download() video_path = f"{video.default_filename}" video_clip = VideoFileClip(video_path) audio_clip = video_clip.audio audio_clip.write_audiofile("output.wav") audio_clip.close() video_clip.close() for removalmp4 in glob.glob("*.mp4"): os.remove(removalmp4) return "Finished downloading! Please proceed to final tab." def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): if show_amount_of_files_and_file_dur == True: gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.") if mindur == maxdur: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") elif mindur > maxdur: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") elif name_for_split_files == None: raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.") else: audio_path = "output.wav" audio_regions = auditok.split( audio_path, min_dur=mindur, max_dur=maxdur, max_silence=0.3, energy_threshold=45 ) os.remove(audio_path) for i, r in enumerate(audio_regions): filename = r.save(f"{name_for_split_files}-{i+1}.wav") for f in sorted(glob.glob("*.wav")): set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav" audio_files = glob.glob("*.wav") zip_file_name = "audio_files.zip" with zipfile.ZipFile(zip_file_name, "w") as zip_file: for audio_file in audio_files: zip_file.write(audio_file, os.path.basename(audio_file)) if show_amount_of_files_and_file_dur == False: return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name elif show_amount_of_files_and_file_dur == True: largest_file = ("", 0) total_files = 0 total_length = 0.0 for file_name in glob.glob("*.wav"): file_path = os.path.join(os.getcwd(), file_name) if file_path.lower().endswith(".wav"): try: with wave.open(file_path, 'r') as audio_file: frames = audio_file.getnframes() rate = audio_file.getframerate() duration = frames / float(rate) file_size = os.path.getsize(file_path) if file_size > largest_file[1]: largest_file = (file_name, file_size) total_length += duration total_files += 1 except wave.Error as e: raise gr.Error(f"Error reading file: {e}") length_mins = total_length / 60 for file2 in glob.glob("*.wav"): os.remove(file2) return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name with gr.Blocks(theme=gr.themes.Soft(), title="Global Dataset Maker") as app: gr.HTML( "

Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space)

" ) gr.Markdown( "This Space will create a dataset for you, all automatically." ) with gr.Tabs(): with gr.TabItem("Download Video"): with gr.Row(): gr.Markdown( "Enter a YT link here, and it will save as a WAV." ) url = gr.Textbox(label="URL") convertion = gr.Button("Download", variant='primary') convertion.click( fn=download_video, inputs=[url], outputs=gr.Text(label="Output") ) with gr.TabItem("Split audio files"): with gr.Row(): gr.Markdown( "Split the WAV file based on silence. You can also set a name for the split files here too." ) mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=8) name_for_split_files = gr.Textbox(label="Name for split files") show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") splitbtn = gr.Button("Split", variant='primary') splitbtn.click( split_audio, inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] ) app.launch()