File size: 5,640 Bytes
635ad89
f088aee
 
 
 
 
635ad89
 
 
 
f088aee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1a6d8e
 
f088aee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f2153e
f088aee
 
 
 
0a26beb
 
 
 
f088aee
 
 
 
 
 
 
0a26beb
 
 
 
 
 
 
f088aee
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr, glob, os, auditok, random, zipfile, wave, pytube.exceptions
from pytube import YouTube
from moviepy.editor import VideoFileClip
import auditok

def download_video(url):
    try:
        yt = YouTube(url)
    except pytube.exceptions.RegexMatchError:
        raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
    video = yt.streams.get_highest_resolution()
    video.download()
    video_path = f"{video.default_filename}"
    video_clip = VideoFileClip(video_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile("output.wav")
    audio_clip.close()
    video_clip.close()
    for removalmp4 in glob.glob("*.mp4"):
        os.remove(removalmp4)
    return "Finished downloading! Please proceed to final tab."

def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
    if show_amount_of_files_and_file_dur == True:
        gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.")
    if not os.path.exists("output.wav"):
        raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?")
    if mindur == maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
    elif mindur > maxdur:
        raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
    elif name_for_split_files == None:
        raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.")
    else:
        audio_path = "output.wav"
        audio_regions = auditok.split(
            audio_path,
            min_dur=mindur,
            max_dur=maxdur,
            max_silence=0.3,
            energy_threshold=45
        )
        os.remove(audio_path)
        for i, r in enumerate(audio_regions):
            filename = r.save(f"{name_for_split_files}-{i+1}.wav")
        for f in sorted(glob.glob("*.wav")):
            set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav"
            audio_files = glob.glob("*.wav")
            zip_file_name = "audio_files.zip"
            with zipfile.ZipFile(zip_file_name, "w") as zip_file:
                for audio_file in audio_files:
                    zip_file.write(audio_file, os.path.basename(audio_file))
        if show_amount_of_files_and_file_dur == False:
            return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name
        elif show_amount_of_files_and_file_dur == True:
            largest_file = ("", 0)
            total_files = 0
            total_length = 0.0
            for file_name in glob.glob("*.wav"):
                file_path = os.path.join(os.getcwd(), file_name)
                if file_path.lower().endswith(".wav"):
                    try:
                        with wave.open(file_path, 'r') as audio_file:
                            frames = audio_file.getnframes()
                            rate = audio_file.getframerate()
                            duration = frames / float(rate)
                            file_size = os.path.getsize(file_path)
                            if file_size > largest_file[1]:
                                largest_file = (file_name, file_size)
                            total_length += duration
                            total_files += 1
                    except wave.Error as e:
                        raise gr.Error(f"Error reading file: {e}")
            length_mins = total_length / 60
            for file2 in glob.glob("*.wav"):
                os.remove(file2)
            return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name

with gr.Blocks(theme=gr.themes.Soft(), title="Global Dataset Maker") as app:
    gr.HTML(
        "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
    )
    gr.Markdown(
        "This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete, so please be patient.**"
    )
    with gr.Tabs():
        with gr.TabItem("Download Video"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        url = gr.Textbox(label="URL")
                        convertion = gr.Button("Download", variant='primary')
            convertion.click(
                fn=download_video,
                inputs=[url],
                outputs=gr.Text(label="Output")
            )
        with gr.TabItem("Split audio files"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
                        maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=8)
                        name_for_split_files = gr.Textbox(label="Name for split files")
                        show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
                    splitbtn = gr.Button("Split", variant='primary')
            splitbtn.click(
                split_audio,
                inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
                outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
            )
        
app.launch()