Athspi commited on
Commit
c885037
·
verified ·
1 Parent(s): 8d2b72a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -149,31 +149,28 @@ def detect_language(audio_file):
149
  return "Error: No audio file uploaded."
150
 
151
  try:
152
- # Convert audio to WAV format
153
- wav_path = convert_to_wav(audio_file)
154
- logger.info(f"Audio file converted to WAV: {wav_path}")
155
-
156
  # Define device and compute type for faster-whisper
157
  device = "cuda" if torch.cuda.is_available() else "cpu"
158
  compute_type = "float32" if device == "cuda" else "int8"
159
- logger.info(f"Using device: {device}, compute_type: {compute_type}")
160
 
161
  # Load the faster-whisper model for language detection
162
  model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
163
- logger.info("Faster-Whisper model loaded successfully.")
 
 
 
 
 
164
 
165
  # Detect the language using faster-whisper
166
- segments, info = model.transcribe(wav_path, task="translate", language=None)
167
  detected_language_code = info.language
168
- logger.info(f"Detected language code: {detected_language_code}")
169
 
170
  # Get the full language name from the code
171
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
172
- logger.info(f"Detected language: {detected_language}")
173
 
174
- # Clean up temporary WAV file
175
- os.remove(wav_path)
176
- logger.info("Temporary WAV file removed.")
177
 
178
  return f"Detected Language: {detected_language}"
179
  except Exception as e:
@@ -317,15 +314,11 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
317
  return "Error: No audio file uploaded."
318
 
319
  try:
320
- # Convert audio to WAV format
321
- wav_path = convert_to_wav(audio_file)
322
-
323
  # Convert audio to 16kHz mono for better compatibility
324
- audio = AudioSegment.from_file(wav_path)
325
  audio = audio.set_frame_rate(16000).set_channels(1)
326
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_processed:
327
- processed_audio_path = temp_processed.name
328
- audio.export(processed_audio_path, format="wav")
329
 
330
  # Load the appropriate model
331
  if model_size == "Faster Whisper Large v3":
@@ -363,7 +356,6 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
363
 
364
  # Clean up processed audio file
365
  os.remove(processed_audio_path)
366
- os.remove(wav_path)
367
 
368
  # Return transcription and detected language
369
  return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
 
149
  return "Error: No audio file uploaded."
150
 
151
  try:
 
 
 
 
152
  # Define device and compute type for faster-whisper
153
  device = "cuda" if torch.cuda.is_available() else "cpu"
154
  compute_type = "float32" if device == "cuda" else "int8"
 
155
 
156
  # Load the faster-whisper model for language detection
157
  model = WhisperModel(MODELS["Faster Whisper Large v3"], device=device, compute_type=compute_type)
158
+
159
+ # Convert audio to 16kHz mono for better compatibility
160
+ audio = AudioSegment.from_file(audio_file)
161
+ audio = audio.set_frame_rate(16000).set_channels(1)
162
+ processed_audio_path = "processed_audio.wav"
163
+ audio.export(processed_audio_path, format="wav")
164
 
165
  # Detect the language using faster-whisper
166
+ segments, info = model.transcribe(processed_audio_path, task="translate", language=None)
167
  detected_language_code = info.language
 
168
 
169
  # Get the full language name from the code
170
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
 
171
 
172
+ # Clean up processed audio file
173
+ os.remove(processed_audio_path)
 
174
 
175
  return f"Detected Language: {detected_language}"
176
  except Exception as e:
 
314
  return "Error: No audio file uploaded."
315
 
316
  try:
 
 
 
317
  # Convert audio to 16kHz mono for better compatibility
318
+ audio = AudioSegment.from_file(audio_file)
319
  audio = audio.set_frame_rate(16000).set_channels(1)
320
+ processed_audio_path = "processed_audio.wav"
321
+ audio.export(processed_audio_path, format="wav")
 
322
 
323
  # Load the appropriate model
324
  if model_size == "Faster Whisper Large v3":
 
356
 
357
  # Clean up processed audio file
358
  os.remove(processed_audio_path)
 
359
 
360
  # Return transcription and detected language
361
  return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"