Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import torch
|
|
4 |
import os
|
5 |
from pydub import AudioSegment
|
6 |
from transformers import pipeline
|
|
|
7 |
|
8 |
# Mapping of model names to Whisper model sizes
|
9 |
MODELS = {
|
@@ -11,7 +12,8 @@ MODELS = {
|
|
11 |
"Base (Faster)": "base",
|
12 |
"Small (Balanced)": "small",
|
13 |
"Medium (Accurate)": "medium",
|
14 |
-
"Large (Most Accurate)": "large"
|
|
|
15 |
}
|
16 |
|
17 |
# Fine-tuned models for specific languages
|
@@ -180,19 +182,28 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
|
|
180 |
detected_language = language
|
181 |
else:
|
182 |
# Use the selected Whisper model
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
detected_language_code =
|
189 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
190 |
else:
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
# Clean up processed audio file
|
198 |
os.remove(processed_audio_path)
|
|
|
4 |
import os
|
5 |
from pydub import AudioSegment
|
6 |
from transformers import pipeline
|
7 |
+
from faster_whisper import WhisperModel # Import faster-whisper
|
8 |
|
9 |
# Mapping of model names to Whisper model sizes
|
10 |
MODELS = {
|
|
|
12 |
"Base (Faster)": "base",
|
13 |
"Small (Balanced)": "small",
|
14 |
"Medium (Accurate)": "medium",
|
15 |
+
"Large (Most Accurate)": "large",
|
16 |
+
"Systran Faster Whisper Large v3": "Systran/faster-whisper-large-v3" # Add the new model
|
17 |
}
|
18 |
|
19 |
# Fine-tuned models for specific languages
|
|
|
182 |
detected_language = language
|
183 |
else:
|
184 |
# Use the selected Whisper model
|
185 |
+
if model_size == "Systran/faster-whisper-large-v3":
|
186 |
+
# Use faster-whisper for the Systran model
|
187 |
+
model = WhisperModel(model_size, device="cuda" if torch.cuda.is_available() else "cpu")
|
188 |
+
segments, info = model.transcribe(processed_audio_path, beam_size=5)
|
189 |
+
transcription = " ".join([segment.text for segment in segments])
|
190 |
+
detected_language_code = info.language
|
191 |
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
192 |
else:
|
193 |
+
# Use the standard Whisper model
|
194 |
+
model = whisper.load_model(MODELS[model_size])
|
195 |
+
|
196 |
+
# Transcribe the audio
|
197 |
+
if language == "Auto Detect":
|
198 |
+
result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
|
199 |
+
detected_language_code = result.get("language", "unknown")
|
200 |
+
detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
|
201 |
+
else:
|
202 |
+
language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
|
203 |
+
result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
|
204 |
+
detected_language = language
|
205 |
+
|
206 |
+
transcription = result["text"]
|
207 |
|
208 |
# Clean up processed audio file
|
209 |
os.remove(processed_audio_path)
|