| import gradio as gr, glob, os, auditok, random, zipfile, wave | |
| from pytube import YouTube | |
| from moviepy.editor import VideoFileClip | |
| import auditok | |
| def download_video(url): | |
| yt = YouTube(url) | |
| video = yt.streams.get_highest_resolution() | |
| video.download() | |
| video_path = f"{video.default_filename}" | |
| video_clip = VideoFileClip(video_path) | |
| audio_clip = video_clip.audio | |
| audio_clip.write_audiofile("output.wav") | |
| audio_clip.close() | |
| video_clip.close() | |
| for removalmp4 in glob.glob("*.mp4"): | |
| os.remove(removalmp4) | |
| return "Finished downloading! Please proceed to final tab." | |
| def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): | |
| if show_amount_of_files_and_file_dur == True: | |
| gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.") | |
| if not os.path.exists("output.wav"): | |
| raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?") | |
| if mindur == maxdur: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") | |
| elif mindur > maxdur: | |
| raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") | |
| elif name_for_split_files == None: | |
| raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.") | |
| else: | |
| audio_path = "output.wav" | |
| audio_regions = auditok.split( | |
| audio_path, | |
| min_dur=mindur, | |
| max_dur=maxdur, | |
| max_silence=0.3, | |
| energy_threshold=45 | |
| ) | |
| os.remove(audio_path) | |
| for i, r in enumerate(audio_regions): | |
| filename = r.save(f"{name_for_split_files}-{i+1}.wav") | |
| for f in sorted(glob.glob("*.wav")): | |
| set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav" | |
| audio_files = glob.glob("*.wav") | |
| zip_file_name = "audio_files.zip" | |
| with zipfile.ZipFile(zip_file_name, "w") as zip_file: | |
| for audio_file in audio_files: | |
| zip_file.write(audio_file, os.path.basename(audio_file)) | |
| if show_amount_of_files_and_file_dur == False: | |
| return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name | |
| elif show_amount_of_files_and_file_dur == True: | |
| largest_file = ("", 0) | |
| total_files = 0 | |
| total_length = 0.0 | |
| for file_name in glob.glob("*.wav"): | |
| file_path = os.path.join(os.getcwd(), file_name) | |
| if file_path.lower().endswith(".wav"): | |
| try: | |
| with wave.open(file_path, 'r') as audio_file: | |
| frames = audio_file.getnframes() | |
| rate = audio_file.getframerate() | |
| duration = frames / float(rate) | |
| file_size = os.path.getsize(file_path) | |
| if file_size > largest_file[1]: | |
| largest_file = (file_name, file_size) | |
| total_length += duration | |
| total_files += 1 | |
| except wave.Error as e: | |
| raise gr.Error(f"Error reading file: {e}") | |
| length_mins = total_length / 60 | |
| for file2 in glob.glob("*.wav"): | |
| os.remove(file2) | |
| return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Global Dataset Maker") as app: | |
| gr.HTML( | |
| "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>" | |
| ) | |
| gr.Markdown( | |
| "This Space will create a dataset for you, all automatically." | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Download Video"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| url = gr.Textbox(label="URL") | |
| convertion = gr.Button("Download", variant='primary') | |
| convertion.click( | |
| fn=download_video, | |
| inputs=[url], | |
| outputs=gr.Text(label="Output") | |
| ) | |
| with gr.TabItem("Split audio files"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
| maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=8) | |
| name_for_split_files = gr.Textbox(label="Name for split files") | |
| show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") | |
| splitbtn = gr.Button("Split", variant='primary') | |
| splitbtn.click( | |
| split_audio, | |
| inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], | |
| outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] | |
| ) | |
| app.launch() |