roman
commited on
Commit
·
19a40bb
1
Parent(s):
cbb91a2
16000
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ def map_to_pred(file_path):
|
|
| 21 |
audio, _ = librosa.load(file_path)
|
| 22 |
|
| 23 |
# preprocess audio and generate standard
|
| 24 |
-
input_features = processor([audio], return_tensors="pt", sampling_rate=
|
| 25 |
generated_ids = model.generate(inputs=input_features)
|
| 26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
| 27 |
text = processor.tokenizer._normalize(transcription[0])
|
|
@@ -33,6 +33,13 @@ if uploaded_file is not None:
|
|
| 33 |
with open(file_path, 'wb') as f:
|
| 34 |
f.write(uploaded_file.getbuffer())
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
text = map_to_pred(file_path)
|
| 37 |
|
| 38 |
# display results
|
|
|
|
| 21 |
audio, _ = librosa.load(file_path)
|
| 22 |
|
| 23 |
# preprocess audio and generate standard
|
| 24 |
+
input_features = processor([audio], return_tensors="pt", sampling_rate=16000).input_features
|
| 25 |
generated_ids = model.generate(inputs=input_features)
|
| 26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
| 27 |
text = processor.tokenizer._normalize(transcription[0])
|
|
|
|
| 33 |
with open(file_path, 'wb') as f:
|
| 34 |
f.write(uploaded_file.getbuffer())
|
| 35 |
|
| 36 |
+
# Convert audio file to a format supported by Whisper (if necessary)
|
| 37 |
+
audio = AudioSegment.from_file(temp_file_path)
|
| 38 |
+
temp_wav_path = tempfile.mktemp(suffix=".wav")
|
| 39 |
+
audio.export(temp_wav_path, format="wav")
|
| 40 |
+
|
| 41 |
+
st.audio(uploaded_file, format="audio/wav")
|
| 42 |
+
|
| 43 |
text = map_to_pred(file_path)
|
| 44 |
|
| 45 |
# display results
|