import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, urllib.error, requests, json
from pytube import YouTube
from moviepy.editor import *
import traceback, yt_dlp


def download_video(url, download_as, use_ytdlp):
    if use_ytdlp == True:
        try:
            ydl_opts = {
                'format': f"{download_as}/bestaudio/best",
                'postprocessors': [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': download_as,
                }]
            }
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download(url)
            for i in glob.glob(f"*.{download_as}"):
                if os.path.exists(i):
                    os.rename(i, f"output.{download_as}")
            return "Finished downloading! Please proceed to next tab."
        except:
            raise gr.Error(traceback.format_exc())
    else:
        try:
            yt = YouTube(url)
        except pytube.exceptions.RegexMatchError:
            raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
        except urllib.error.HTTPError as not_ok:
            raise gr.Error(f"Recieved {not_ok}")
        except pytube.exceptions.AgeRestrictedError:
            raise gr.Error("The video you inputted is age-restricted! Please try another link.")
        video = yt.streams.get_highest_resolution()
        video.download()
        video_path = f"{video.default_filename}"
        video_clip = VideoFileClip(video_path)
        audio_clip = video_clip.audio
        if download_as == "wav":
            audio_clip.write_audiofile("output.wav")
        elif download_as == "mp3":
            audio_clip.write_audiofile("output.mp3")
        audio_clip.close()
        video_clip.close()
        for removalmp4 in glob.glob("*.mp4"):
            os.remove(removalmp4)
        return "Finished downloading! Please proceed to next tab."

def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
    if show_amount_of_files_and_file_dur == True:
        gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.")
    if not os.path.exists("output.mp3") and not os.path.exists("output.wav"):
        raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?")
    if mindur == maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
    elif mindur > maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
    elif name_for_split_files == None:
        raise gr.Error("Split files name cannot be empty!")
    else:
        audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3"
        audio_regions = auditok.split(
            audio_path,
            min_dur=mindur,
            max_dur=maxdur,
            max_silence=0.3,
            energy_threshold=45
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_split_files}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            audio_files = glob.glob("*.wav")
            zip_file_name = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name, "w") as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
        if show_amount_of_files_and_file_dur == False:
            for file2 in glob.glob("*.wav"):
                os.remove(file2)
            return "Files split successfully!\nCheck below for zipped files.", zip_file_name
        elif show_amount_of_files_and_file_dur == True:
            largest_file = ("", 0)
            total_files = 0
            total_length = 0.0
            for file_name in glob.glob("*.wav"):
                file_path = os.path.join(os.getcwd(), file_name)
                if file_path.lower().endswith(".wav"):
                    try:
                        with wave.open(file_path, 'r') as audio_file:
                            frames = audio_file.getnframes()
                            rate = audio_file.getframerate()
                            duration = frames / float(rate)
                            file_size = os.path.getsize(file_path)
                            if file_size > largest_file[1]:
                                largest_file = (file_name, file_size)
                            total_length += duration
                            total_files += 1
                    except wave.Error as e:
                        raise gr.Error(f"Error reading file: {e}")
            length_mins = total_length / 60
            for file2 in glob.glob("*.wav"):
                os.remove(file2)
            return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name

def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2, strict):
    if audiofileuploader == None:
        raise gr.Error("Audio file cannot be empty!")
    if mindur2 == maxdur2:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
    elif mindur2 > maxdur2:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
    elif name_for_split_files2 == None:
        raise gr.Error("Split files name cannot be empty!")
    else:
        audio_path = audiofileuploader
        audio_regions = auditok.split(
            audio_path,
            min_dur=mindur2,
            max_dur=maxdur2,
            max_silence=0.3,
            energy_threshold=45,
            strict_min_dur=True if strict == True else False
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_split_files2}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            audio_files = glob.glob("*.wav")
            zip_file_name2 = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name2, "w") as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
    for file2 in glob.glob("*.wav"):
        os.remove(file2)
    return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2

def download_video_as_audio_only(yt_video, audio_output_format):      
    try:
        yt = YouTube(yt_video)
    except pytube.exceptions.RegexMatchError:
        raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
    except urllib.error.HTTPError as not_ok:
        raise gr.Error(f"Recieved {not_ok}")
    except pytube.exceptions.AgeRestrictedError:
        raise gr.Error("The video you inputted is age-restricted! Please try another link.")
    video = yt.streams.get_highest_resolution()
    video.download()
    video_path = f"{video.default_filename}"
    video_clip = VideoFileClip(video_path)
    audio_clip = video_clip.audio
    if audio_output_format == "wav":
        audio_clip.write_audiofile("output.wav")
    elif audio_output_format == "mp3":
        audio_clip.write_audiofile("output.mp3")
    audio_clip.close()
    video_clip.close()
    for mp4remove in glob.glob("*.mp4"):
        os.remove(mp4remove)
    single_zip_name = "only_audio.zip"
    audio_files = glob.glob("*.wav") if audio_output_format == "wav" else glob.glob("*.mp3")
    with zipfile.ZipFile(single_zip_name, 'w') as zip_file:
        for audio_file in audio_files:
            zip_file.write(audio_file, os.path.basename(audio_file))
    for outputwavremoval in glob.glob("*.wav"):
        if os.path.exists(outputwavremoval):
            os.remove(outputwavremoval)
    for outputmp3removal in glob.glob("*.mp3"):
        if os.path.exists(outputmp3removal):
            os.remove(outputmp3removal)
    return f"Done! Download the zip file below! This only contains the audio file.\n\nYou have downloaded {yt.title} by {yt.author}.", single_zip_name

def mp4_to_wav_or_mp3(mp4fileuploader, file_format):
    if mp4fileuploader == None:
        raise gr.Error("Input cannot be empty!")
    else:
        try:
            if file_format == "wav":
                videoinput = AudioFileClip(mp4fileuploader)
                videoinput.write_audiofile("convertedaudio.wav")
                videoinput.close()
            elif file_format == "mp3":
                videoinput = AudioFileClip(mp4fileuploader)
                videoinput.write_audiofile("convertedaudio.mp3")
                videoinput.close()
            what_to_return = "convertedaudio.wav" if file_format == "wav" else "convertedaudio.mp3"
            os.remove(mp4fileuploader)
            return "Converted mp4 file successfully!", what_to_return
        except:
            raise gr.Error(traceback.format_exc())
        
def remove_audio_file_from_directory():
    for r in glob.glob("*.wav"):
        os.remove(r)
    for w in glob.glob("*.mp3"):
        os.remove(w)
    return gr.Info("File removed.")

def mvsep_api_request(mvsep_key, audio_file, sep_int):
    url = "https://mvsep.com/api/separation/create"
    files = {
        "audiofile": open(audio_file, 'rb')
    }
    data = {
        "api_token": mvsep_key,
        "sep_type": sep_int
    }
    r = requests.post(url, files=files, data=data)
    json_format = r.json()
    hash_val = json_format['data']['hash']
    return f"Request sent successfully! Your hash is: {hash_val}.\n\nUse the next tab to check the status of your request."

def mvsep_check_request(hash_textbox):
    try:
        url = "https://mvsep.com/api/separation/get"
        params = {
            "hash": hash_textbox
        }
        r = requests.get(url, params=params)
        rjson = r.json()
        success = rjson['success']
        status = rjson['status']
        return f"Was successful? {str(success)}.\n Status: {status}."
    except requests.exceptions.JSONDecodeError:
        return gr.Info("Status not available or request not sent.")

def mvsep_download_separated_audio(hash_textbox):
    try:
        url = "https://mvsep.com/api/separation/get"
        params = {
            "hash": hash_textbox
        }
        r = requests.get(url, params=params)
        rjson = r.json()
        files = rjson.get('data', {}).get('files', [])
        urls = [file['url'] for file in files]
        return json.dumps(urls, indent=4)
    except requests.exceptions.JSONDecodeError:
        return gr.Info("Nothing to download yet. Check back later.")
    
def mvsep_list_available_models():
    return [
        "0 - spleeter (vocals, music)",
        "1 - spleeter (vocals, drums, bass, other)",
        "2 - spleeter (vocals, drums, bass, piano, other)",
        "3 - unmix XL (vocals, drums, bass, other)",
        "4 - unmix HQ (vocals, drums, bass, other)",
        "5 - unmix SD (vocals, drums, bass, other)",
        "6 - unmix SE (vocals, music)",
        "7 - MDX A (vocals, drums, bass, other)",
        "8 - MDX B (vocals, drums, bass, other)",
        "9 - UVR HQ (vocals, music)",
        "10 - Demucs3 Model A (vocals, drums, bass, other)",
        "11 - Demucs3 Model B (vocals, drums, bass, other)",
        "12 - MDX-B Karaoke (lead/back vocals)",
        "13 - Demucs2 (vocals, drums, bass, other)",
        "14 - Zero Shot (Query Based) (LQ)",
        "15 - Danna sep (vocals, drums, bass, other)",
        "16 - Byte Dance (vocals, drums, bass, other)",
        "17 - UVRv5 Demucs (vocals, music)",
        "18 - MVSep DNR (music, sfx, speech)",
        "19 - MVSep Vocal Model (vocals, music)",
        "20 - Demucs4 HT (vocals, drums, bass, other)",
        "22 - FoxJoy Reverb Removal (other)",
        "23 - MDX B (vocals, instrumental)",
        "24 - MVSep Demucs4HT DNR (dialog, sfx, music)",
        "25 - MDX23C (vocals, instrumental)",
        "26 - Ensemble (vocals, instrumental) [PREMIUM ONLY]",
        "27 - Demucs4 Vocals 2023 (vocals, instrumental)",
        "28 - Ensemble (vocals, instrumental, bass, drums, other) [PREMIUM ONLY]",
        "29 - MVSep Piano (piano, other)",
        "30 - Ensemble All-In (vocals, bass, drums, piano, guitar, lead/back vocals, other) [PREMIUM ONLY]",
        "31 - MVSep Guitar (guitar, other)",
        "33 - Vit Large 23 (vocals, instrum)",
        "34 - MVSep Crowd removal (crowd, other)",
        "35 - MVSep MelBand Roformer (vocals, instrum)",
        "36 - BandIt Plus (speech, music, effects)",
        "37 - DrumSep (kick, snare, cymbals, toms)",
        "38 - LarsNet (kick, snare, cymbals, toms, hihat)",
        "39 - Whisper (extract text from audio)",
        "40 - BS Roformer (vocals, instrumental)",
        "41 - MVSep Bass (bass, other)"
    ]


with gr.Blocks(theme='sudeepshouche/minimalist', title="Global Dataset Maker") as app:
    gr.HTML(
        "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
    )
    gr.Markdown("## Duplicate this space if you want to make your own changes!")
    gr.HTML(
        """<p style="margin:5px auto;display: flex;justify-content: left;">
            <a href="https://huggingface.co/spaces/Kryptone/GDMGS?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md-dark.svg" alt="Duplicate this Space"></a>
        </p>"""
    )
    gr.Markdown(
        "This Space will create a dataset for you and use MVSEP to isolate vocals (EXPERIMENTAL), all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**"
    )
    gr.HTML(
        "<h2> This Space's storage is ephemeral, meaning once you reload this space, all audio files will be lost. </h2>"
    )
    with gr.Tabs():
        with gr.TabItem("Download Video"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        url = gr.Textbox(label="URL")
                        download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?")
                        use_ytdlp = gr.Checkbox(False, label="Use yt_dlp instead of pytube?", info="Sometimes Pytube refuses to download a video. If that happens, check this box to download using yt_dlp instead.")
                        convertion = gr.Button("Download", variant='primary')
            convertion.click(
                fn=download_video,
                inputs=[url, download_as, use_ytdlp],
                outputs=gr.Text(label="Output")
            )
        with gr.TabItem("Split audio files"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                        maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
                        name_for_split_files = gr.Textbox(label="Name for split files")
                        show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
                        splitbtn = gr.Button("Split", variant='primary')
            splitbtn.click(
                split_audio_from_yt_video,
                inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
                outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
            )
        with gr.TabItem("Misc tools"):
            with gr.Tab("File splitter"):
                gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file")
                            mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                            maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
                            name_for_split_files2 = gr.Textbox(label="Name for split files")
                            strict = gr.Checkbox(True, label="Enable strict duration?", info="Use this option if you want to minimize the '(audio_file) is less than 0.76 seconds' warning on Colab. Keep in mind that this only applies for min duration, max is ignored.")
                            audiofileuploadbtn = gr.Button("Split", variant='primary')
                        audiofileuploadbtn.click(
                            split_wav_or_mp3_file,
                            [audiofileuploader, mindur2, maxdur2, name_for_split_files2, strict],
                            [gr.Text(label="Output"), gr.File(label="Zipped files")]
                        )

            with gr.Tab("Audio only download"):
                gr.Markdown("If you want to download only the audio (to isolate bgm using UVR, etc), use this method, which will only extract audio and not split the audio.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            yt_video = gr.Textbox(label="URL")
                            audio_output_format = gr.Radio(["wav", "mp3"], value="wav", label="Download audio as:")
                            commence_download = gr.Button("Download", variant='primary')
                        commence_download.click(
                            download_video_as_audio_only,
                            [yt_video, audio_output_format],
                            [gr.Text(label="Output"), gr.File(label="Zipped audio file")]
                        )
            with gr.Tab("MP4 to mp3/wav converter"):
                gr.Markdown("If you have an mp4 file, you can convert it to mp3 or wav here. Only click the 'Remove file' button when done.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            mp4fileuploader = gr.File(file_count='single', file_types=[".mp4"], label="mp4 file")
                            file_format = gr.Radio(["wav", "mp3"], value="mp3", label="Convert mp4 file to:")
                            convert_btn = gr.Button("Convert", variant='primary')
                            remove_file_btn = gr.Button("Remove file from directory", variant='secondary')
                        convert_btn.click(
                            mp4_to_wav_or_mp3,
                            [mp4fileuploader, file_format],
                            [gr.Text(label="Output"), gr.File(label="Converted audio file")]
                        )
                        remove_file_btn.click(
                            remove_audio_file_from_directory,
                            None,
                            None
                        )
            with gr.Tab("MVSEP"):
                gr.Markdown("**VERY EXPERIMENTAL!** Use MVSEP to isolate audio.\n\n**You will be required to input your API key, but it will not be saved ever, I don't use anything saved here for bad intentions, nor would I have access to it regardless.**")
                with gr.Tab("Send Request"):
                    with gr.Row():
                        with gr.Column():
                            with gr.Row():
                                mvsep_key = gr.Textbox(placeholder="Enter your MVSEP API key.", label="API key")
                                audio_file = gr.File(file_count='single', file_types=[".mp3"], label="Audio file")
                                sep_int = gr.Number(11, label="Separation type (default is 11).", minimum=0, maximum=41, interactive=True)
                                send_req = gr.Button("Send request", variant='primary')
                                get_available_models = gr.Button("Get available models", variant='secondary')
                            send_req.click(
                                mvsep_api_request,
                                [mvsep_key, audio_file, sep_int],
                                [gr.Text(label="Output")]
                            )
                            get_available_models.click(
                                mvsep_list_available_models,
                                None,
                                [gr.Text(label="Models")]
                            )
                with gr.Tab("Get status of request"):
                    with gr.Row():
                        with gr.Column():
                            with gr.Row():
                                hash_textbox = gr.Textbox(label="Hash")
                                check_status = gr.Button("Check status", variant='primary')
                                download = gr.Button("Download separated audio", variant='secondary')
                            check_status.click(
                                mvsep_check_request,
                                [hash_textbox],
                                [gr.Text(label="Status")]
                            )
                            download.click(
                                mvsep_download_separated_audio,
                                [hash_textbox],
                                [gr.Text(label="Link(s)")]
                            )

        with gr.TabItem("Changelog"):
            gr.Markdown("v0.99.4 - Added a button to display the available models for MVSEP.")
            gr.Markdown("v0.99.3 - Added MVSEP in Misc Tools. This is VERY EXPERIMENTAL and there will be bugs present.")
            gr.Markdown("v0.99.2 - Added an mp4 file converter.")
            gr.Markdown("v0.99.1 - Removed very old tools (including the 'Upcoming Features' tab) that did not fit with the nature of the program.")
            gr.Markdown("v0.99 - Added 'Strict Duration' mode for the file splitter.")
            gr.Markdown("v0.98.2 - Added new upcoming features tab.")
        
app.launch()