OthmaneJ commited on
Commit
c768111
Β·
1 Parent(s): 936698b
Files changed (4) hide show
  1. app.py +40 -0
  2. packages.txt +1 -0
  3. poem.wav +0 -0
  4. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
2
+ import gradio as gr
3
+ import torch
4
+ import soundfile as sf
5
+ import librosa
6
+
7
+
8
+ # load model and processor
9
+ model_name = "OthmaneJ/distil-wav2vec2"
10
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
11
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
12
+
13
+ # define function to read in sound file
14
+ # def map_to_array(file):
15
+ # speech, sample_rate = sf.read(file)
16
+ # return speech, sample_rate
17
+
18
+ # tokenize
19
+ def inference(audio):
20
+ # read in sound file
21
+ speech, _ = librosa.load(audio.name,sr=16_000)
22
+ input_values = processor(speech, sampling_rate=16_000, return_tensors="pt", padding="longest").input_values # Batch size 1
23
+ # retrieve logits
24
+ logits = model(input_values).logits
25
+ # take argmax and decode
26
+ predicted_ids = torch.argmax(logits, dim=-1)
27
+ transcription = processor.batch_decode(predicted_ids)
28
+ return transcription[0]
29
+
30
+ inputs = gr.inputs.Audio(label="Input Audio", type="file")
31
+ outputs = gr.outputs.Textbox(label="Output Text")
32
+
33
+
34
+ inputs = gr.inputs.Audio(label="Input Audio", type="file")
35
+ outputs = [gr.outputs.Textbox(label="Output Text"),gr.outputs.Textbox(label="Output Text")]
36
+ title = "Distilled wav2vec 2.0"
37
+ description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
38
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
39
+ examples=[['poem.wav']]
40
+ gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libsndfile1
poem.wav ADDED
Binary file (499 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ requests
3
+ #python-dotenv
4
+ #streamlit
5
+ SoundFile
6
+ librosa
7
+ transformers
8
+ #pyannote.audio==1.1