bofenghuang commited on
Commit
a356f8e
·
1 Parent(s): 2c9c9de

merge interfaces

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. requirements.txt +1 -0
  3. run_demo.py +77 -0
app.py CHANGED
@@ -1 +1 @@
1
- run_demo_microphone.py
 
1
+ run_demo.py
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  transformers
2
  torch
 
3
  pyctcdecode
4
  pypi-kenlm
 
1
  transformers
2
  torch
3
+ torchaudio
4
  pyctcdecode
5
  pypi-kenlm
run_demo.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import warnings
3
+
4
+ import gradio as gr
5
+ import torchaudio
6
+ from transformers import pipeline
7
+ from transformers.utils.logging import disable_progress_bar
8
+
9
+ SAMPLE_RATE = 16_000
10
+
11
+ warnings.filterwarnings("ignore")
12
+
13
+ disable_progress_bar()
14
+
15
+ logging.basicConfig(
16
+ format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
17
+ datefmt="%Y-%m-%dT%H:%M:%SZ",
18
+ )
19
+ logger = logging.getLogger(__name__)
20
+ logger.setLevel(logging.DEBUG)
21
+
22
+ pipe = pipeline(model="bhuang/asr-wav2vec2-french")
23
+ logger.info("ASR pipeline has been initialized")
24
+
25
+
26
+ def process_audio_file(audio_file):
27
+ waveform, sample_rate = torchaudio.load(audio_file)
28
+ waveform = waveform.squeeze(axis=0) # mono
29
+
30
+ # resample
31
+ if sample_rate != SAMPLE_RATE:
32
+ resampler = torchaudio.transforms.Resample(sample_rate, SAMPLE_RATE)
33
+ waveform = resampler(waveform)
34
+
35
+ return waveform
36
+
37
+
38
+ def transcribe(microphone_audio_file, uploaded_audio_file):
39
+ warning_message = ""
40
+ if (microphone_audio_file is not None) and (uploaded_audio_file is not None):
41
+ warning_message = (
42
+ "WARNING: You've uploaded an audio file and used the microphone. "
43
+ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
44
+ )
45
+ audio_file = microphone_audio_file
46
+ elif (microphone_audio_file is None) and (uploaded_audio_file is None):
47
+ return "ERROR: You have to either use the microphone or upload an audio file"
48
+ elif microphone_audio_file is not None:
49
+ audio_file = microphone_audio_file
50
+ else:
51
+ audio_file = uploaded_audio_file
52
+
53
+ audio_data = process_audio_file(audio_file)
54
+
55
+ # text = pipe(audio, chunk_length_s=30, stride_length_s=5)["text"]
56
+ text = pipe(audio_data)["text"]
57
+ logger.info(f"Transcription for {audio_file}: {text}")
58
+
59
+ return warning_message + text
60
+
61
+
62
+ iface = gr.Interface(
63
+ fn=transcribe,
64
+ inputs=[
65
+ gr.Audio(source="microphone", type="filepath", label="Record something...", optional=True),
66
+ gr.Audio(source="upload", type="filepath", label="Upload some audio file...", optional=True),
67
+ ],
68
+ outputs="text",
69
+ layout="horizontal",
70
+ # theme="huggingface",
71
+ title="Speech-to-Text in French",
72
+ description="Realtime demo for French automatic speech recognition.",
73
+ allow_flagging="never",
74
+ )
75
+
76
+ # iface.launch(server_name="0.0.0.0", debug=True, share=True)
77
+ iface.launch(enable_queue=True)