pratikshahp commited on
Commit
9b3e60b
·
verified ·
1 Parent(s): d970fdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -1,11 +1,10 @@
1
- import librosa
2
  import torch
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
  import streamlit as st
5
-
6
  from audio_recorder_streamlit import audio_recorder
7
 
8
- audio_bytes = audio_recorder()
9
  if audio_bytes:
10
  st.audio(audio_bytes, format="audio/wav")
11
 
@@ -14,12 +13,14 @@ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
14
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
15
 
16
  #load audio file
17
- speech, rate = librosa.load("/hip-voice.m4a",sr=16000)
 
 
 
18
 
19
- import IPython.display as display
20
- display.Audio("batman1.wav", autoplay=True)
21
 
22
- input_values = tokenizer(speech, return_tensors = 'pt').input_values
23
  logits = model(input_values).logits
24
 
25
  predicted_ids = torch.argmax(logits, dim =-1)
@@ -28,5 +29,3 @@ predicted_ids = torch.argmax(logits, dim =-1)
28
  transcriptions = tokenizer.decode(predicted_ids[0])
29
 
30
  print(transcriptions)
31
-
32
- st.write("hi")
 
1
+ #import librosa
2
  import torch
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
  import streamlit as st
 
5
  from audio_recorder_streamlit import audio_recorder
6
 
7
+ audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000)
8
  if audio_bytes:
9
  st.audio(audio_bytes, format="audio/wav")
10
 
 
13
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
14
 
15
  #load audio file
16
+ #speech, rate = librosa.load("/hip-voice.m4a",sr=16000)
17
+
18
+ #import IPython.display as display
19
+ #display.Audio("batman1.wav", autoplay=True)
20
 
21
+ input_values = tokenizer(audio_bytes, return_tensors = 'pt').input_values
 
22
 
23
+ #input_values = tokenizer(speech, return_tensors = 'pt').input_values
24
  logits = model(input_values).logits
25
 
26
  predicted_ids = torch.argmax(logits, dim =-1)
 
29
  transcriptions = tokenizer.decode(predicted_ids[0])
30
 
31
  print(transcriptions)