import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, urllib.error, requests, json from pytube import YouTube from moviepy.editor import * import traceback, yt_dlp def download_video(url, download_as, use_ytdlp): if use_ytdlp == True: try: ydl_opts = { 'format': f"{download_as}/bestaudio/best", 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': download_as, }] } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(url) for i in glob.glob(f"*.{download_as}"): if os.path.exists(i): os.rename(i, f"output.{download_as}") return "Finished downloading! Please proceed to next tab." except: raise gr.Error(traceback.format_exc()) else: try: yt = YouTube(url) except pytube.exceptions.RegexMatchError: raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") except urllib.error.HTTPError as not_ok: raise gr.Error(f"Recieved {not_ok}") except pytube.exceptions.AgeRestrictedError: raise gr.Error("The video you inputted is age-restricted! Please try another link.") video = yt.streams.get_highest_resolution() video.download() video_path = f"{video.default_filename}" video_clip = VideoFileClip(video_path) audio_clip = video_clip.audio if download_as == "wav": audio_clip.write_audiofile("output.wav") elif download_as == "mp3": audio_clip.write_audiofile("output.mp3") audio_clip.close() video_clip.close() for removalmp4 in glob.glob("*.mp4"): os.remove(removalmp4) return "Finished downloading! Please proceed to next tab." def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): if show_amount_of_files_and_file_dur == True: gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.") if not os.path.exists("output.mp3") and not os.path.exists("output.wav"): raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?") if mindur == maxdur: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") elif mindur > maxdur: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") elif name_for_split_files == None: raise gr.Error("Split files name cannot be empty!") else: audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3" audio_regions = auditok.split( audio_path, min_dur=mindur, max_dur=maxdur, max_silence=0.3, energy_threshold=45 ) os.remove(audio_path) for i, r in enumerate(audio_regions): filename = r.save(f"{name_for_split_files}-{i+1}.wav") for f in sorted(glob.glob("*.wav")): audio_files = glob.glob("*.wav") zip_file_name = "audio_files.zip" with zipfile.ZipFile(zip_file_name, "w") as zip_file: for audio_file in audio_files: zip_file.write(audio_file, os.path.basename(audio_file)) if show_amount_of_files_and_file_dur == False: for file2 in glob.glob("*.wav"): os.remove(file2) return "Files split successfully!\nCheck below for zipped files.", zip_file_name elif show_amount_of_files_and_file_dur == True: largest_file = ("", 0) total_files = 0 total_length = 0.0 for file_name in glob.glob("*.wav"): file_path = os.path.join(os.getcwd(), file_name) if file_path.lower().endswith(".wav"): try: with wave.open(file_path, 'r') as audio_file: frames = audio_file.getnframes() rate = audio_file.getframerate() duration = frames / float(rate) file_size = os.path.getsize(file_path) if file_size > largest_file[1]: largest_file = (file_name, file_size) total_length += duration total_files += 1 except wave.Error as e: raise gr.Error(f"Error reading file: {e}") length_mins = total_length / 60 for file2 in glob.glob("*.wav"): os.remove(file2) return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2, strict): if audiofileuploader == None: raise gr.Error("Audio file cannot be empty!") if mindur2 == maxdur2: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") elif mindur2 > maxdur2: raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") elif name_for_split_files2 == None: raise gr.Error("Split files name cannot be empty!") else: audio_path = audiofileuploader audio_regions = auditok.split( audio_path, min_dur=mindur2, max_dur=maxdur2, max_silence=0.3, energy_threshold=45, strict_min_dur=True if strict == True else False ) os.remove(audio_path) for i, r in enumerate(audio_regions): filename = r.save(f"{name_for_split_files2}-{i+1}.wav") for f in sorted(glob.glob("*.wav")): audio_files = glob.glob("*.wav") zip_file_name2 = "audio_files.zip" with zipfile.ZipFile(zip_file_name2, "w") as zip_file: for audio_file in audio_files: zip_file.write(audio_file, os.path.basename(audio_file)) for file2 in glob.glob("*.wav"): os.remove(file2) return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2 def download_video_as_audio_only(yt_video, audio_output_format): try: yt = YouTube(yt_video) except pytube.exceptions.RegexMatchError: raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") except urllib.error.HTTPError as not_ok: raise gr.Error(f"Recieved {not_ok}") except pytube.exceptions.AgeRestrictedError: raise gr.Error("The video you inputted is age-restricted! Please try another link.") video = yt.streams.get_highest_resolution() video.download() video_path = f"{video.default_filename}" video_clip = VideoFileClip(video_path) audio_clip = video_clip.audio if audio_output_format == "wav": audio_clip.write_audiofile("output.wav") elif audio_output_format == "mp3": audio_clip.write_audiofile("output.mp3") audio_clip.close() video_clip.close() for mp4remove in glob.glob("*.mp4"): os.remove(mp4remove) single_zip_name = "only_audio.zip" audio_files = glob.glob("*.wav") if audio_output_format == "wav" else glob.glob("*.mp3") with zipfile.ZipFile(single_zip_name, 'w') as zip_file: for audio_file in audio_files: zip_file.write(audio_file, os.path.basename(audio_file)) for outputwavremoval in glob.glob("*.wav"): if os.path.exists(outputwavremoval): os.remove(outputwavremoval) for outputmp3removal in glob.glob("*.mp3"): if os.path.exists(outputmp3removal): os.remove(outputmp3removal) return f"Done! Download the zip file below! This only contains the audio file.\n\nYou have downloaded {yt.title} by {yt.author}.", single_zip_name def mp4_to_wav_or_mp3(mp4fileuploader, file_format): if mp4fileuploader == None: raise gr.Error("Input cannot be empty!") else: try: if file_format == "wav": videoinput = AudioFileClip(mp4fileuploader) videoinput.write_audiofile("convertedaudio.wav") videoinput.close() elif file_format == "mp3": videoinput = AudioFileClip(mp4fileuploader) videoinput.write_audiofile("convertedaudio.mp3") videoinput.close() what_to_return = "convertedaudio.wav" if file_format == "wav" else "convertedaudio.mp3" os.remove(mp4fileuploader) return "Converted mp4 file successfully!", what_to_return except: raise gr.Error(traceback.format_exc()) def remove_audio_file_from_directory(): for r in glob.glob("*.wav"): os.remove(r) for w in glob.glob("*.mp3"): os.remove(w) return gr.Info("File removed.") def mvsep_api_request(mvsep_key, audio_file, sep_int): url = "https://mvsep.com/api/separation/create" files = { "audiofile": open(audio_file, 'rb') } data = { "api_token": mvsep_key, "sep_type": sep_int } r = requests.post(url, files=files, data=data) json_format = r.json() hash_val = json_format['data']['hash'] return f"Request sent successfully! Your hash is: {hash_val}.\n\nUse the next tab to check the status of your request." def mvsep_check_request(hash_textbox): try: url = "https://mvsep.com/api/separation/get" params = { "hash": hash_textbox } r = requests.get(url, params=params) rjson = r.json() success = rjson['success'] status = rjson['status'] return f"Was successful? {str(success)}.\n Status: {status}." except requests.exceptions.JSONDecodeError: return gr.Info("Status not available or request not sent.") def mvsep_download_separated_audio(hash_textbox): try: url = "https://mvsep.com/api/separation/get" params = { "hash": hash_textbox } r = requests.get(url, params=params) rjson = r.json() files = rjson.get('data', {}).get('files', []) urls = [file['url'] for file in files] return json.dumps(urls, indent=4) except requests.exceptions.JSONDecodeError: return gr.Info("Nothing to download yet. Check back later.") with gr.Blocks(theme='sudeepshouche/minimalist', title="Global Dataset Maker") as app: gr.HTML( "

Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space)

" ) gr.Markdown("## Duplicate this space if you want to make your own changes!") gr.HTML( """

Duplicate this Space

""" ) gr.Markdown( "This Space will create a dataset for you and use MVSEP to isolate vocals (EXPERIMENTAL), all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**" ) gr.HTML( "

This Space's storage is ephemeral, meaning once you reload this space, all audio files will be lost.

" ) with gr.Tabs(): with gr.TabItem("Download Video"): with gr.Row(): with gr.Column(): with gr.Row(): url = gr.Textbox(label="URL") download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?") use_ytdlp = gr.Checkbox(False, label="Use yt_dlp instead of pytube?", info="Sometimes Pytube refuses to download a video. If that happens, check this box to download using yt_dlp instead.") convertion = gr.Button("Download", variant='primary') convertion.click( fn=download_video, inputs=[url, download_as, use_ytdlp], outputs=gr.Text(label="Output") ) with gr.TabItem("Split audio files"): with gr.Row(): with gr.Column(): with gr.Row(): mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) name_for_split_files = gr.Textbox(label="Name for split files") show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") splitbtn = gr.Button("Split", variant='primary') splitbtn.click( split_audio_from_yt_video, inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] ) with gr.TabItem("Misc tools"): with gr.Tab("File splitter"): gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.") with gr.Row(): with gr.Column(): with gr.Row(): audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file") mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) name_for_split_files2 = gr.Textbox(label="Name for split files") strict = gr.Checkbox(True, label="Enable strict duration?", info="Use this option if you want to minimize the '(audio_file) is less than 0.76 seconds' warning on Colab. Keep in mind that this only applies for min duration, max is ignored.") audiofileuploadbtn = gr.Button("Split", variant='primary') audiofileuploadbtn.click( split_wav_or_mp3_file, [audiofileuploader, mindur2, maxdur2, name_for_split_files2, strict], [gr.Text(label="Output"), gr.File(label="Zipped files")] ) with gr.Tab("Audio only download"): gr.Markdown("If you want to download only the audio (to isolate bgm using UVR, etc), use this method, which will only extract audio and not split the audio.") with gr.Row(): with gr.Column(): with gr.Row(): yt_video = gr.Textbox(label="URL") audio_output_format = gr.Radio(["wav", "mp3"], value="wav", label="Download audio as:") commence_download = gr.Button("Download", variant='primary') commence_download.click( download_video_as_audio_only, [yt_video, audio_output_format], [gr.Text(label="Output"), gr.File(label="Zipped audio file")] ) with gr.Tab("MP4 to mp3/wav converter"): gr.Markdown("If you have an mp4 file, you can convert it to mp3 or wav here. Only click the 'Remove file' button when done.") with gr.Row(): with gr.Column(): with gr.Row(): mp4fileuploader = gr.File(file_count='single', file_types=[".mp4"], label="mp4 file") file_format = gr.Radio(["wav", "mp3"], value="mp3", label="Convert mp4 file to:") convert_btn = gr.Button("Convert", variant='primary') remove_file_btn = gr.Button("Remove file from directory", variant='secondary') convert_btn.click( mp4_to_wav_or_mp3, [mp4fileuploader, file_format], [gr.Text(label="Output"), gr.File(label="Converted audio file")] ) remove_file_btn.click( remove_audio_file_from_directory, None, None ) with gr.Tab("MVSEP"): gr.Markdown("**VERY EXPERIMENTAL!** Use MVSEP to isolate audio.\n\n**You will be required to input your API key, but it will not be saved ever, I don't use anything saved here for bad intentions, nor would I have access to it regardless.**") with gr.Tab("Send Request"): with gr.Row(): with gr.Column(): with gr.Row(): mvsep_key = gr.Textbox(placeholder="Enter your MVSEP API key.", label="API key") audio_file = gr.File(file_count='single', file_types=[".mp3"], label="Audio file") sep_int = gr.Number(11, label="Separation type (default is 11).", minimum=0, maximum=40, interactive=True) send_req = gr.Button("Send request", variant='primary') send_req.click( mvsep_api_request, [mvsep_key, audio_file, sep_int], [gr.Text(label="Output")] ) with gr.Tab("Get status of request"): with gr.Row(): with gr.Column(): with gr.Row(): hash_textbox = gr.Textbox(label="Hash") check_status = gr.Button("Check status", variant='primary') download = gr.Button("Download separated audio", variant='secondary') check_status.click( mvsep_check_request, [hash_textbox], [gr.Text(label="Status")] ) download.click( mvsep_download_separated_audio, [hash_textbox], [gr.Text(label="Link(s)")] ) with gr.TabItem("Changelog"): gr.Markdown("v0.99.3 - Added MVSEP in Misc Tools. This is VERY EXPERIMENTAL and there will be bugs present.") gr.Markdown("v0.99.2 - Added an mp4 file converter.") gr.Markdown("v0.99.1 - Removed very old tools (including the 'Upcoming Features' tab) that did not fit with the nature of the program.") gr.Markdown("v0.99 - Added 'Strict Duration' mode for the file splitter.") gr.Markdown("v0.98.2 - Added new upcoming features tab.") app.launch()