Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ import evaluate
|
|
| 11 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
| 12 |
import gradio as gr
|
| 13 |
import torch
|
|
|
|
| 14 |
|
| 15 |
set_caching_enabled(False)
|
| 16 |
disable_caching()
|
|
@@ -28,7 +29,16 @@ model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc
|
|
| 28 |
# Evaluate the model
|
| 29 |
# model.eval()
|
| 30 |
#print("model.eval ", model.eval())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def map_to_pred(batch):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
audio = batch["audio"]
|
| 33 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
| 34 |
batch["reference"] = processor.tokenizer._normalize(batch['sentence'])
|
|
|
|
| 11 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
| 12 |
import gradio as gr
|
| 13 |
import torch
|
| 14 |
+
import re
|
| 15 |
|
| 16 |
set_caching_enabled(False)
|
| 17 |
disable_caching()
|
|
|
|
| 29 |
# Evaluate the model
|
| 30 |
# model.eval()
|
| 31 |
#print("model.eval ", model.eval())
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Remove brackets and extra spaces
|
| 35 |
+
|
| 36 |
+
|
| 37 |
def map_to_pred(batch):
|
| 38 |
+
cleaned_transcription = re.sub(r'\[[^\]]+\]', '', batch).strip()
|
| 39 |
+
cleaned_transcription = preprocess_transcription(batch['sentence'])
|
| 40 |
+
normalized_transcription = processor.tokenizer._normalize(cleaned_transcription)
|
| 41 |
+
|
| 42 |
audio = batch["audio"]
|
| 43 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
| 44 |
batch["reference"] = processor.tokenizer._normalize(batch['sentence'])
|