Rezuwan commited on
Commit
7a2e84b
·
verified ·
1 Parent(s): f591b11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -85,7 +85,11 @@ from transformers import pipeline
85
  import librosa
86
  import numpy as np
87
 
88
- transcriber = pipeline("automatic-speech-recognition", model="Rezuwan/regional_asr_weights")
 
 
 
 
89
 
90
  def transcribe_audio(audio_path):
91
  try:
@@ -102,8 +106,30 @@ iface = gr.Interface(
102
  fn=transcribe_audio,
103
  inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
104
  outputs=gr.Textbox(label="Transcription"),
105
- title="🗣️ Barishal ASR Speech-to-Text",
106
- description="Upload or record an audio file to transcribe Bengali speech using the Barishal ASR model."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  )
108
 
109
  iface.launch()
 
85
  import librosa
86
  import numpy as np
87
 
88
+
89
+ MODEL_NAME = "Rezuwan/regional_asr_weights"
90
+
91
+
92
+ transcriber = pipeline("automatic-speech-recognition", model=MODEL_NAME)
93
 
94
  def transcribe_audio(audio_path):
95
  try:
 
106
  fn=transcribe_audio,
107
  inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
108
  outputs=gr.Textbox(label="Transcription"),
109
+ title="Bengali Speech-to-Text with Regional Dialects",
110
+ description=(
111
+ f"""
112
+ Model Card: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of arbitrary length. [Do leave a like (❤️) on the model card and this space]
113
+
114
+ Instructions:
115
+
116
+ 1. Click on 'Record' option in the left 'Upload or Record Audio' section and record the audio.
117
+ 2. When done recording, click on 'Stop' button and give it some time until some waveform shows up in the 'Upload or Record Audio' section (Same goes when uploading pre-recorded audio files) and then click the 'Submit' button.
118
+ 3. Wait for the audio clip to be processed (This could take a while 😅. Still needs work on the inference time) and then transcription of the audio will appear on the right 'output' section.
119
+ 4. If want to submit a trimmed version of the input, select the trimmed audio snippet and then click 'Trim' and then wait a bit until wavform
120
+ shows up in the input section of the interface and then click 'Submit'.
121
+
122
+
123
+
124
+ Note:
125
+
126
+ 1. Since the corpus used to fine-tune this model was really small, The orthography might still not be upto the mark but it gets the work done but still needs work and manual validation.
127
+
128
+ 2.With proper data and a larger version of the corpus, I guess I'll be able to increase it's transcription performance of the Bengali speech with regional dialects.
129
+
130
+ ![](screenshot.jpg)
131
+ """
132
+ )
133
  )
134
 
135
  iface.launch()