Update app.py
Browse files
app.py
CHANGED
@@ -187,6 +187,21 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
187 |
|
188 |
return output_path
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
def detect_voice_activity(audio_file, threshold=0.02):
|
191 |
"""
|
192 |
Detect voice activity in the audio file and trim the audio to include only voice segments.
|
@@ -198,8 +213,11 @@ def detect_voice_activity(audio_file, threshold=0.02):
|
|
198 |
Returns:
|
199 |
str: Path to the output audio file with only voice segments.
|
200 |
"""
|
201 |
-
#
|
202 |
-
|
|
|
|
|
|
|
203 |
|
204 |
# Normalize the audio data
|
205 |
if data.dtype != np.float32:
|
@@ -230,6 +248,9 @@ def detect_voice_activity(audio_file, threshold=0.02):
|
|
230 |
output_path = "voice_trimmed_audio.wav"
|
231 |
wavfile.write(output_path, sample_rate, trimmed_audio)
|
232 |
|
|
|
|
|
|
|
233 |
return output_path
|
234 |
|
235 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
|
|
187 |
|
188 |
return output_path
|
189 |
|
190 |
+
def convert_to_wav(audio_file):
|
191 |
+
"""
|
192 |
+
Convert the input audio file to WAV format.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
audio_file (str): Path to the input audio file.
|
196 |
+
|
197 |
+
Returns:
|
198 |
+
str: Path to the converted WAV file.
|
199 |
+
"""
|
200 |
+
audio = AudioSegment.from_file(audio_file)
|
201 |
+
wav_path = "converted_audio.wav"
|
202 |
+
audio.export(wav_path, format="wav")
|
203 |
+
return wav_path
|
204 |
+
|
205 |
def detect_voice_activity(audio_file, threshold=0.02):
|
206 |
"""
|
207 |
Detect voice activity in the audio file and trim the audio to include only voice segments.
|
|
|
213 |
Returns:
|
214 |
str: Path to the output audio file with only voice segments.
|
215 |
"""
|
216 |
+
# Convert the input audio to WAV format
|
217 |
+
wav_path = convert_to_wav(audio_file)
|
218 |
+
|
219 |
+
# Load the WAV file
|
220 |
+
sample_rate, data = wavfile.read(wav_path)
|
221 |
|
222 |
# Normalize the audio data
|
223 |
if data.dtype != np.float32:
|
|
|
248 |
output_path = "voice_trimmed_audio.wav"
|
249 |
wavfile.write(output_path, sample_rate, trimmed_audio)
|
250 |
|
251 |
+
# Clean up the converted WAV file
|
252 |
+
os.remove(wav_path)
|
253 |
+
|
254 |
return output_path
|
255 |
|
256 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|