Athspi commited on
Commit
ba6b40b
·
verified ·
1 Parent(s): cfd9ff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import os
5
  from pydub import AudioSegment
6
  from transformers import pipeline
 
7
 
8
  # Mapping of model names to Whisper model sizes
9
  MODELS = {
@@ -11,7 +12,8 @@ MODELS = {
11
  "Base (Faster)": "base",
12
  "Small (Balanced)": "small",
13
  "Medium (Accurate)": "medium",
14
- "Large (Most Accurate)": "large"
 
15
  }
16
 
17
  # Fine-tuned models for specific languages
@@ -180,19 +182,28 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
180
  detected_language = language
181
  else:
182
  # Use the selected Whisper model
183
- model = whisper.load_model(MODELS[model_size])
184
-
185
- # Transcribe the audio
186
- if language == "Auto Detect":
187
- result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
188
- detected_language_code = result.get("language", "unknown")
189
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
190
  else:
191
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
192
- result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
193
- detected_language = language
194
-
195
- transcription = result["text"]
 
 
 
 
 
 
 
 
 
196
 
197
  # Clean up processed audio file
198
  os.remove(processed_audio_path)
 
4
  import os
5
  from pydub import AudioSegment
6
  from transformers import pipeline
7
+ from faster_whisper import WhisperModel # Import faster-whisper
8
 
9
  # Mapping of model names to Whisper model sizes
10
  MODELS = {
 
12
  "Base (Faster)": "base",
13
  "Small (Balanced)": "small",
14
  "Medium (Accurate)": "medium",
15
+ "Large (Most Accurate)": "large",
16
+ "Systran Faster Whisper Large v3": "Systran/faster-whisper-large-v3" # Add the new model
17
  }
18
 
19
  # Fine-tuned models for specific languages
 
182
  detected_language = language
183
  else:
184
  # Use the selected Whisper model
185
+ if model_size == "Systran/faster-whisper-large-v3":
186
+ # Use faster-whisper for the Systran model
187
+ model = WhisperModel(model_size, device="cuda" if torch.cuda.is_available() else "cpu")
188
+ segments, info = model.transcribe(processed_audio_path, beam_size=5)
189
+ transcription = " ".join([segment.text for segment in segments])
190
+ detected_language_code = info.language
191
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
192
  else:
193
+ # Use the standard Whisper model
194
+ model = whisper.load_model(MODELS[model_size])
195
+
196
+ # Transcribe the audio
197
+ if language == "Auto Detect":
198
+ result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
199
+ detected_language_code = result.get("language", "unknown")
200
+ detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
201
+ else:
202
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
203
+ result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
204
+ detected_language = language
205
+
206
+ transcription = result["text"]
207
 
208
  # Clean up processed audio file
209
  os.remove(processed_audio_path)