Kryptone commited on
Commit
e8f356e
·
1 Parent(s): 9433e9a

add file splitter in misc tools

Browse files
Files changed (1) hide show
  1. app.py +50 -9
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr, glob, os, auditok, random, zipfile, wave, pytube.exceptions
2
  from pytube import YouTube
3
  from moviepy.editor import VideoFileClip
4
 
@@ -19,9 +19,9 @@ def download_video(url):
19
  os.remove(removalmp4)
20
  return "Finished downloading! Please proceed to final tab."
21
 
22
- def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
23
  if show_amount_of_files_and_file_dur == True:
24
- gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.")
25
  if not os.path.exists("output.wav"):
26
  raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?")
27
  if mindur == maxdur:
@@ -43,14 +43,13 @@ def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_f
43
  for i, r in enumerate(audio_regions):
44
  filename = r.save(f"{name_for_split_files}-{i+1}.wav")
45
  for f in sorted(glob.glob("*.wav")):
46
- set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav"
47
  audio_files = glob.glob("*.wav")
48
  zip_file_name = "audio_files.zip"
49
  with zipfile.ZipFile(zip_file_name, "w") as zip_file:
50
  for audio_file in audio_files:
51
  zip_file.write(audio_file, os.path.basename(audio_file))
52
  if show_amount_of_files_and_file_dur == False:
53
- return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name
54
  elif show_amount_of_files_and_file_dur == True:
55
  largest_file = ("", 0)
56
  total_files = 0
@@ -73,7 +72,7 @@ def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_f
73
  length_mins = total_length / 60
74
  for file2 in glob.glob("*.wav"):
75
  os.remove(file2)
76
- return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name
77
 
78
  def analyze_audio(zip_file_path):
79
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
@@ -96,13 +95,41 @@ def analyze_audio(zip_file_path):
96
  else:
97
  return "No average sample rate could be found."
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  with gr.Blocks(theme=gr.themes.Monochrome(), title="Global Dataset Maker") as app:
101
  gr.HTML(
102
  "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
103
  )
104
  gr.Markdown(
105
- "This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete, so please be patient.**"
106
  )
107
  with gr.Tabs():
108
  with gr.TabItem("Download Video"):
@@ -126,7 +153,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), title="Global Dataset Maker") as ap
126
  show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
127
  splitbtn = gr.Button("Split", variant='primary')
128
  splitbtn.click(
129
- split_audio,
130
  inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
131
  outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
132
  )
@@ -143,5 +170,19 @@ with gr.Blocks(theme=gr.themes.Monochrome(), title="Global Dataset Maker") as ap
143
  [zipuploader],
144
  [gr.Text(label="Result")]
145
  )
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  app.launch()
 
1
+ import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions
2
  from pytube import YouTube
3
  from moviepy.editor import VideoFileClip
4
 
 
19
  os.remove(removalmp4)
20
  return "Finished downloading! Please proceed to final tab."
21
 
22
+ def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
23
  if show_amount_of_files_and_file_dur == True:
24
+ gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.")
25
  if not os.path.exists("output.wav"):
26
  raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?")
27
  if mindur == maxdur:
 
43
  for i, r in enumerate(audio_regions):
44
  filename = r.save(f"{name_for_split_files}-{i+1}.wav")
45
  for f in sorted(glob.glob("*.wav")):
 
46
  audio_files = glob.glob("*.wav")
47
  zip_file_name = "audio_files.zip"
48
  with zipfile.ZipFile(zip_file_name, "w") as zip_file:
49
  for audio_file in audio_files:
50
  zip_file.write(audio_file, os.path.basename(audio_file))
51
  if show_amount_of_files_and_file_dur == False:
52
+ return "Files split successfully!\nCheck below for zipped files.", zip_file_name
53
  elif show_amount_of_files_and_file_dur == True:
54
  largest_file = ("", 0)
55
  total_files = 0
 
72
  length_mins = total_length / 60
73
  for file2 in glob.glob("*.wav"):
74
  os.remove(file2)
75
+ return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name
76
 
77
  def analyze_audio(zip_file_path):
78
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
 
95
  else:
96
  return "No average sample rate could be found."
97
 
98
+ def split_wav_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2):
99
+ if audiofileuploader == None:
100
+ raise gr.Error("Audio file cannot be empty!")
101
+ if mindur2 == maxdur2:
102
+ raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
103
+ elif mindur2 > maxdur2:
104
+ raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
105
+ elif name_for_split_files2 == None:
106
+ raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.")
107
+ else:
108
+ audio_path = audiofileuploader
109
+ audio_regions = auditok.split(
110
+ audio_path,
111
+ min_dur=mindur2,
112
+ max_dur=maxdur2,
113
+ max_silence=0.3,
114
+ energy_threshold=45
115
+ )
116
+ os.remove(audio_path)
117
+ for i, r in enumerate(audio_regions):
118
+ filename = r.save(f"{name_for_split_files2}-{i+1}.wav")
119
+ for f in sorted(glob.glob("*.wav")):
120
+ audio_files = glob.glob("*.wav")
121
+ zip_file_name2 = "audio_files.zip"
122
+ with zipfile.ZipFile(zip_file_name2, "w") as zip_file:
123
+ for audio_file in audio_files:
124
+ zip_file.write(audio_file, os.path.basename(audio_file))
125
+ return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2
126
 
127
  with gr.Blocks(theme=gr.themes.Monochrome(), title="Global Dataset Maker") as app:
128
  gr.HTML(
129
  "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
130
  )
131
  gr.Markdown(
132
+ "This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**"
133
  )
134
  with gr.Tabs():
135
  with gr.TabItem("Download Video"):
 
153
  show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
154
  splitbtn = gr.Button("Split", variant='primary')
155
  splitbtn.click(
156
+ split_audio_from_yt_video,
157
  inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
158
  outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
159
  )
 
170
  [zipuploader],
171
  [gr.Text(label="Result")]
172
  )
173
+ with gr.Tab("File splitter"):
174
+ gr.Markdown("If you would rather split a single WAV (mp3 support soon) audio file, use this method instead.")
175
+ with gr.Row():
176
+ with gr.Column():
177
+ with gr.Row():
178
+ audiofileuploader = gr.File(file_count='single', file_types=[".wav"], label="WAV file")
179
+ mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
180
+ maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
181
+ name_for_split_files2 = gr.Textbox(label="Name for split files")
182
+ audiofileuploadbtn = gr.Button("Split", variant='primary')
183
+ audiofileuploadbtn.click(
184
+ split_wav_file,
185
+ [audiofileuploader, mindur2, maxdur2, name_for_split_files2],
186
+ [gr.Text(label="Output"), gr.File(label="Zipped files")]
187
+ )
188
  app.launch()