Update app.py
Browse files
app.py
CHANGED
@@ -149,31 +149,28 @@ def detect_language(audio_file):
|
|
149 |
return "Error: No audio file uploaded."
|
150 |
|
151 |
try:
|
152 |
-
# Convert audio to WAV format
|
153 |
-
wav_path = convert_to_wav(audio_file)
|
154 |
-
logger.info(f"Audio file converted to WAV: {wav_path}")
|
155 |
-
|
156 |
# Define device and compute type for faster-whisper
|
157 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
158 |
compute_type = "float32" if device == "cuda" else "int8"
|
159 |
-
logger.info(f"Using device: {device}, compute_type: {compute_type}")
|
160 |
|
161 |
# Load the faster-whisper model for language detection
|
162 |
model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
# Detect the language using faster-whisper
|
166 |
-
segments, info = model.transcribe(
|
167 |
detected_language_code = info.language
|
168 |
-
logger.info(f"Detected language code: {detected_language_code}")
|
169 |
|
170 |
# Get the full language name from the code
|
171 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
172 |
-
logger.info(f"Detected language: {detected_language}")
|
173 |
|
174 |
-
# Clean up
|
175 |
-
os.remove(
|
176 |
-
logger.info("Temporary WAV file removed.")
|
177 |
|
178 |
return f"Detected Language: {detected_language}"
|
179 |
except Exception as e:
|
@@ -317,15 +314,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
|
|
317 |
return "Error: No audio file uploaded."
|
318 |
|
319 |
try:
|
320 |
-
# Convert audio to WAV format
|
321 |
-
wav_path = convert_to_wav(audio_file)
|
322 |
-
|
323 |
# Convert audio to 16kHz mono for better compatibility
|
324 |
-
audio = AudioSegment.from_file(
|
325 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
326 |
-
|
327 |
-
|
328 |
-
audio.export(processed_audio_path, format="wav")
|
329 |
|
330 |
# Load the appropriate model
|
331 |
if model_size == "Faster Whisper Large v3":
|
@@ -363,7 +356,6 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
|
|
363 |
|
364 |
# Clean up processed audio file
|
365 |
os.remove(processed_audio_path)
|
366 |
-
os.remove(wav_path)
|
367 |
|
368 |
# Return transcription and detected language
|
369 |
return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
|
|
|
149 |
return "Error: No audio file uploaded."
|
150 |
|
151 |
try:
|
|
|
|
|
|
|
|
|
152 |
# Define device and compute type for faster-whisper
|
153 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
154 |
compute_type = "float32" if device == "cuda" else "int8"
|
|
|
155 |
|
156 |
# Load the faster-whisper model for language detection
|
157 |
model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
|
158 |
+
|
159 |
+
# Convert audio to 16kHz mono for better compatibility
|
160 |
+
audio = AudioSegment.from_file(audio_file)
|
161 |
+
audio = audio.set_frame_rate(16000).set_channels(1)
|
162 |
+
processed_audio_path = "processed_audio.wav"
|
163 |
+
audio.export(processed_audio_path, format="wav")
|
164 |
|
165 |
# Detect the language using faster-whisper
|
166 |
+
segments, info = model.transcribe(processed_audio_path, task="translate", language=None)
|
167 |
detected_language_code = info.language
|
|
|
168 |
|
169 |
# Get the full language name from the code
|
170 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
|
|
171 |
|
172 |
+
# Clean up processed audio file
|
173 |
+
os.remove(processed_audio_path)
|
|
|
174 |
|
175 |
return f"Detected Language: {detected_language}"
|
176 |
except Exception as e:
|
|
|
314 |
return "Error: No audio file uploaded."
|
315 |
|
316 |
try:
|
|
|
|
|
|
|
317 |
# Convert audio to 16kHz mono for better compatibility
|
318 |
+
audio = AudioSegment.from_file(audio_file)
|
319 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
320 |
+
processed_audio_path = "processed_audio.wav"
|
321 |
+
audio.export(processed_audio_path, format="wav")
|
|
|
322 |
|
323 |
# Load the appropriate model
|
324 |
if model_size == "Faster Whisper Large v3":
|
|
|
356 |
|
357 |
# Clean up processed audio file
|
358 |
os.remove(processed_audio_path)
|
|
|
359 |
|
360 |
# Return transcription and detected language
|
361 |
return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
|