import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, librosa, time
from pytube import YouTube
from moviepy.editor import VideoFileClip

def download_video(url, download_as):
    try:
        yt = YouTube(url)
    except pytube.exceptions.RegexMatchError:
        raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
    video = yt.streams.get_highest_resolution()
    video.download()
    video_path = f"{video.default_filename}"
    video_clip = VideoFileClip(video_path)
    audio_clip = video_clip.audio
    if download_as == "wav":
        audio_clip.write_audiofile("output.wav")
    elif download_as == "mp3":
        audio_clip.write_audiofile("output.mp3")
    audio_clip.close()
    video_clip.close()
    for removalmp4 in glob.glob("*.mp4"):
        os.remove(removalmp4)
    return "Finished downloading! Please proceed to next tab."

def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
    if show_amount_of_files_and_file_dur == True:
        gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.")
    if not os.path.exists("output.mp3") and not os.path.exists("output.wav"):
        raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?")
    if mindur == maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
    elif mindur > maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
    elif name_for_split_files == None:
        raise gr.Error("Split files name cannot be empty!")
    else:
        audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3"
        audio_regions = auditok.split(
            audio_path,
            min_dur=mindur,
            max_dur=maxdur,
            max_silence=0.3,
            energy_threshold=45
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_split_files}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            audio_files = glob.glob("*.wav")
            zip_file_name = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name, "w") as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
        if show_amount_of_files_and_file_dur == False:
            for file2 in glob.glob("*.wav"):
                os.remove(file2)
            return "Files split successfully!\nCheck below for zipped files.", zip_file_name
        elif show_amount_of_files_and_file_dur == True:
            largest_file = ("", 0)
            total_files = 0
            total_length = 0.0
            for file_name in glob.glob("*.wav"):
                file_path = os.path.join(os.getcwd(), file_name)
                if file_path.lower().endswith(".wav"):
                    try:
                        with wave.open(file_path, 'r') as audio_file:
                            frames = audio_file.getnframes()
                            rate = audio_file.getframerate()
                            duration = frames / float(rate)
                            file_size = os.path.getsize(file_path)
                            if file_size > largest_file[1]:
                                largest_file = (file_name, file_size)
                            total_length += duration
                            total_files += 1
                    except wave.Error as e:
                        raise gr.Error(f"Error reading file: {e}")
            length_mins = total_length / 60
            for file2 in glob.glob("*.wav"):
                os.remove(file2)
            return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name

def analyze_audio(zip_file_path):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall('unzipped_files')
    total_sample_rate = 0
    total_files = 0
    for file_name in os.listdir('unzipped_files'):
        if file_name.lower().endswith('.wav'):
            file_path = os.path.join('unzipped_files', file_name)
            try:
                with wave.open(file_path, 'r') as audio_file:
                    sample_rate = audio_file.getframerate()
                    total_sample_rate += sample_rate
                    total_files += 1
            except wave.Error as e:
                print(f"Error reading file: {e}")
    if total_files > 0:
        average_sample_rate = total_sample_rate / total_files
        return f"Average sample rate: {average_sample_rate}"
    else:
        return "No average sample rate could be found."

def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2):
    if audiofileuploader == None:
        raise gr.Error("Audio file cannot be empty!")
    if mindur2 == maxdur2:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
    elif mindur2 > maxdur2:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
    elif name_for_split_files2 == None:
        raise gr.Error("Split files name cannot be empty!")
    else:
        audio_path = audiofileuploader
        audio_regions = auditok.split(
            audio_path,
            min_dur=mindur2,
            max_dur=maxdur2,
            max_silence=0.3,
            energy_threshold=45
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_split_files2}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            audio_files = glob.glob("*.wav")
            zip_file_name2 = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name2, "w") as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
    for file2 in glob.glob("*.wav"):
        os.remove(file2)
    return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2

def get_average_pitch(audio_file):
    if audio_file == None:
        raise gr.Error("Audio file cannot be empty!")
    else:
        y, sr = librosa.load(audio_file, sr=None)
        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
        mean_pitch = pitches.mean()
        return f"Average pitch: {mean_pitch:.2f} Hz"

def all_in_one_inator(ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files, progress=gr.Progress()):
    if download_as == "mp3":
        gr.Warning("MP3 is experimental, especially with this, so caution is advised.")
    if not os.path.exists("output.mp3") and not os.path.exists("output.wav"):
        raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?")
    if min_duration == max_duration:
        raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, min and max are the same number.")
    elif min_duration > max_duration:
        raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, mindur is higher than maxdur.")
    elif name_for_outputted_split_files == None:
        raise gr.Error("Split files name cannot be empty!")
    else:
        try:
            progress(0, "Downloading video...")
            yt = YouTube(ytvideo)
        except pytube.exceptions.RegexMatchError:
            raise gr.Error("URL not valid or was left empty! Please fix the link or enter one.")
        video = yt.streams.get_highest_resolution()
        video.download()
        video_path = f"{video.default_filename}"
        video_clip = VideoFileClip(video_path)
        audio_clip = video_clip.audio
        if download_yt_video_as == "wav":
            audio_clip.write_audiofile("output.wav")
        elif download_yt_video_as == "mp3":
            audio_clip.write_audiofile("output.mp3")
        audio_clip.close()
        video_clip.close()
        for removemp4 in glob.glob("*.mp4"):
            os.remove(removemp4)
        progress(0.5, "Video downloaded! Starting split process...")
        audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3"
        audio_regions = auditok.split(
            audio_path,
            min_dur=min_duration,
            max_dur=max_duration,
            max_silence=0.3,
            energy_threshold=45
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_outputted_split_files}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            audio_files = glob.glob("*.wav")
            zip_file_name = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name, 'w') as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
        for file2 in glob.glob("*.wav"):
            os.remove(file2)
        progress(1, "Done! Cleaning up...")
        time.sleep(2)
        return "Process done successfully! Check below for zipped files!", zip_file_name

with gr.Blocks(theme='NoCrypt/miku', title="Global Dataset Maker") as app:
    gr.HTML(
        "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
    )
    gr.Markdown(
        "This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**"
    )
    gr.HTML("<h1> MP3 file support works for downloading a video and splitting a single file. </h1>")
    with gr.Tabs():
        with gr.TabItem("Download Video"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        url = gr.Textbox(label="URL")
                        download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?")
                        convertion = gr.Button("Download", variant='primary')
            convertion.click(
                fn=download_video,
                inputs=[url, download_as],
                outputs=gr.Text(label="Output")
            )
        with gr.TabItem("Split audio files"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                        maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
                        name_for_split_files = gr.Textbox(label="Name for split files")
                        show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
                        splitbtn = gr.Button("Split", variant='primary')
            splitbtn.click(
                split_audio_from_yt_video,
                inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
                outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
            )
        with gr.TabItem("Misc tools"):
            with gr.Tab("SR analyzer"):
                gr.Markdown("Upload a zip file of your wavs here and this will determine the average sample rate.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            zipuploader = gr.File(file_count='single', file_types=[".zip"], label="ZIP file")
                        uploadbtn = gr.Button("Analyze", variant='primary')
                    uploadbtn.click(
                        analyze_audio,
                        [zipuploader],
                        [gr.Text(label="Result")]
                    )
            with gr.Tab("File splitter"):
                gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file")
                            mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                            maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
                            name_for_split_files2 = gr.Textbox(label="Name for split files")
                            audiofileuploadbtn = gr.Button("Split", variant='primary')
                        audiofileuploadbtn.click(
                            split_wav_or_mp3_file,
                            [audiofileuploader, mindur2, maxdur2, name_for_split_files2],
                            [gr.Text(label="Output"), gr.File(label="Zipped files")]
                        )
            with gr.Tab("Pitch analyzer"):
                gr.Markdown("Upload a wav file here, and this will determine the average pitch.")
                gr.HTML("<h1> Zip files and MP3 files are not supported as of now.")
                with gr.Row():
                    with gr.Column(variant='compact'):
                            with gr.Row():
                                upload = gr.File(file_count='single', file_types=[".wav"], label="WAV file")
                            analyze = gr.Button("Analyze", variant='primary')
                    analyze.click(
                        get_average_pitch,
                        [upload],
                        [gr.Text(label="Result")]
                    )
            with gr.Tab("All-in-one downloader and splitter"):
                gr.Markdown("This is very experimental and may break or change in the future. This essentially combines both the first 2 tabs into an all-in-one script.")
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            ytvideo = gr.Textbox(label="URL")
                            download_yt_video_as = gr.Radio(["wav", "mp3"], value="wav", label="Audio output format")
                            min_duration = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                            max_duration = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
                            name_for_outputted_split_files = gr.Textbox(label="Name for split files")
                            download_and_split_btn = gr.Button("Download and split", variant='primary')
                        download_and_split_btn.click(
                            all_in_one_inator,
                            [ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files],
                            [gr.Text(label="Result"), gr.File(label="Zipped files")]
                        )
        with gr.TabItem("Changelog"):
            gr.Markdown("v0.93 - Removed obsolete warnings and fixed issue with all-in-one if output.mp3 or output.wav doesnt exist.")
            gr.Markdown("v0.92 - Added all-in-one tab under Misc Tools.")
            gr.Markdown("v0.91 - Added mp3 file support for single file splitting, and also fixed bug if neither output.wav or output.mp3 exists.")
            gr.Markdown("v0.90a - Fixed bug that if 'show_amount_of_files_and_file_dur' was False, split wav files would not be deleted.")
            gr.Markdown("v0.90 - Added mp3 support for downloading a Youtube video.")
            gr.Markdown("v0.85 - Fixed bug in pitch analyzer if no audio file was given.")
            gr.Markdown("v0.80 - Added new tool: Pitch Analyzer.")
            gr.Markdown("v0.75 - Fixed bug that would cause split wav files to be packaged with the previously split wav files.")
            gr.Markdown("v0.74 - Added new tool: WAV file splitter.")
            gr.Markdown("v0.73 - Added Misc Tools tab and new Sample Rate analyzer tool.")
            gr.Markdown("v0.70 - Fixed bug if no URL was passed or if the URL was invalid.")
            gr.Markdown("v0.65 - Fixed bug if user tried to split an audio file when 'output.wav' didnt exist.")
            gr.Markdown("v0.60 - Initial push to Huggingface Space.")
            
app.launch()