roman
commited on
Commit
·
87bebbb
1
Parent(s):
e9f45f4
chose from list, increase sampling rate
Browse files
app.py
CHANGED
|
@@ -2,20 +2,26 @@ import streamlit as st
|
|
| 2 |
import librosa
|
| 3 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
| 4 |
|
|
|
|
|
|
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
def map_to_pred(file_path):
|
| 14 |
# load audio file
|
| 15 |
audio, _ = librosa.load(file_path)
|
| 16 |
|
| 17 |
# preprocess audio and generate standard
|
| 18 |
-
input_features = processor([audio], return_tensors="pt", sampling_rate=
|
| 19 |
generated_ids = model.generate(inputs=input_features)
|
| 20 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
| 21 |
text = processor.tokenizer._normalize(transcription[0])
|
|
|
|
| 2 |
import librosa
|
| 3 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
| 4 |
|
| 5 |
+
# Define available models
|
| 6 |
+
available_models = ["Yehor/whisper-small-ukrainian"]
|
| 7 |
|
| 8 |
+
st.title("Voice Recognition App")
|
| 9 |
|
| 10 |
+
# Model selection dropdown
|
| 11 |
+
model_choice = st.selectbox("Choose a model", available_models)
|
| 12 |
+
|
| 13 |
+
processor = AutoProcessor.from_pretrained(model_choice)
|
| 14 |
|
| 15 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_choice)
|
| 16 |
|
| 17 |
+
uploaded_file = st.file_uploader("Choose file", type=["wav", "mp3"])
|
| 18 |
|
| 19 |
def map_to_pred(file_path):
|
| 20 |
# load audio file
|
| 21 |
audio, _ = librosa.load(file_path)
|
| 22 |
|
| 23 |
# preprocess audio and generate standard
|
| 24 |
+
input_features = processor([audio], return_tensors="pt", sampling_rate=32_000).input_features
|
| 25 |
generated_ids = model.generate(inputs=input_features)
|
| 26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
| 27 |
text = processor.tokenizer._normalize(transcription[0])
|