Spaces:

OthmaneJ
/

transcribe-distil-wav2vec2

Runtime error

App Files Files Community

OthmaneJ commited on Oct 12, 2021

Commit

c768111

1 Parent(s): 936698b

all files

Browse files

Files changed (4) hide show

app.py +40 -0
packages.txt +1 -0
poem.wav +0 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+import gradio as gr
+import torch
+import soundfile as sf
+import librosa
+# load model and processor
+model_name = "OthmaneJ/distil-wav2vec2"
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+# define function to read in sound file
+# def map_to_array(file):
+#    speech, sample_rate = sf.read(file)
+#    return speech, sample_rate
+# tokenize
+def inference(audio):
+   # read in sound file
+   speech, _ = librosa.load(audio.name,sr=16_000)
+   input_values = processor(speech, sampling_rate=16_000, return_tensors="pt", padding="longest").input_values  # Batch size 1
+   # retrieve logits
+   logits = model(input_values).logits
+   # take argmax and decode
+   predicted_ids = torch.argmax(logits, dim=-1)
+   transcription = processor.batch_decode(predicted_ids)
+   return transcription[0]
+inputs = gr.inputs.Audio(label="Input Audio", type="file")
+outputs =  gr.outputs.Textbox(label="Output Text")
+inputs = gr.inputs.Audio(label="Input Audio", type="file")
+outputs =  [gr.outputs.Textbox(label="Output Text"),gr.outputs.Textbox(label="Output Text")]
+title = "Distilled wav2vec 2.0"
+description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
+examples=[['poem.wav']]
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libsndfile1

poem.wav ADDED Viewed

Binary file (499 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+requests
+#python-dotenv
+#streamlit
+SoundFile
+librosa
+transformers
+#pyannote.audio==1.1