aikitty commited on
Commit
27c7c02
1 Parent(s): 21ec319

Add front end

Browse files
Files changed (1) hide show
  1. app.py +31 -1
app.py CHANGED
@@ -1,3 +1,33 @@
1
  import gradio as gr
 
 
2
 
3
- gr.load("models/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
+ import torch
4
 
5
+ # Load the model and processor
6
+ processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
7
+ model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
8
+
9
+ # Function to transcribe the audio
10
+ def transcribe_audio(audio):
11
+ input_values = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_values
12
+
13
+ # Inference
14
+ with torch.no_grad():
15
+ logits = model(input_values).logits
16
+
17
+ # Decode the transcription
18
+ predicted_ids = torch.argmax(logits, dim=-1)
19
+ transcription = processor.batch_decode(predicted_ids)
20
+
21
+ return transcription[0] # Since we're only handling one audio file
22
+
23
+ # Set up the Gradio interface
24
+ interface = gr.Interface(
25
+ fn=transcribe_audio,
26
+ inputs=gr.Audio(source="microphone", type="filepath"), # Accept audio files
27
+ outputs="text",
28
+ title="Chinese Audio Transcription",
29
+ description="Upload or record an audio file to transcribe it into Chinese."
30
+ )
31
+
32
+ # Launch the interface
33
+ interface.launch()