txya900619 commited on
Commit
a3effe4
·
1 Parent(s): 5548ade

feat: add app.py

Browse files
Files changed (3) hide show
  1. app.py +151 -0
  2. configs/models.yaml +18 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import gradio as gr
4
+ from huggingface_hub import snapshot_download
5
+ from omegaconf import OmegaConf
6
+ from vosk import KaldiRecognizer, Model
7
+
8
+
9
+ def load_vosk(model_id: str):
10
+ model_dir = snapshot_download(model_id)
11
+ return Model(model_path=model_dir)
12
+
13
+
14
+ OmegaConf.register_new_resolver("load_vosk", load_vosk)
15
+
16
+ models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
17
+
18
+
19
+ def automatic_speech_recognition(
20
+ model_id: str, dialect_id: str, stream: str, new_chunk: str
21
+ ):
22
+ if isinstance(models_config[model_id]["model"], dict):
23
+ model = models_config[model_id]["model"][dialect_id]
24
+ else:
25
+ model = models_config[model_id]["model"]
26
+
27
+ sample_rate, audio_array = new_chunk
28
+ if audio_array.ndim == 2:
29
+ audio_array = audio_array[:, 0]
30
+
31
+ audio_bytes = audio_array.tobytes()
32
+
33
+ if stream is None:
34
+ rec = KaldiRecognizer(model, sample_rate)
35
+ rec.SetWords(True)
36
+ result = []
37
+ else:
38
+ rec, result = stream
39
+
40
+ if rec.AcceptWaveform(audio_bytes):
41
+ text_result = json.loads(rec.Result())["text"]
42
+ if text_result != "":
43
+ result.append(text_result)
44
+ partial_result = ""
45
+ else:
46
+ partial_result = json.loads(rec.PartialResult())["partial"] + " "
47
+
48
+ if len(result) > 0:
49
+ output_text = ",".join(result) + "," + partial_result
50
+ else:
51
+ output_text = partial_result
52
+
53
+ return (rec, result), output_text
54
+
55
+
56
+ def when_model_selected(model_id: str):
57
+ model_config = models_config[model_id]
58
+
59
+ if "dialect_mapping" not in model_config:
60
+ return gr.update(visible=False)
61
+
62
+ dialect_drop_down_choices = [
63
+ (k, v) for k, v in model_config["dialect_mapping"].items()
64
+ ]
65
+
66
+ return gr.update(
67
+ choices=dialect_drop_down_choices,
68
+ value=dialect_drop_down_choices[0][1],
69
+ visible=True,
70
+ )
71
+
72
+
73
+ demo = gr.Blocks(
74
+ title="臺灣客語語音辨識系統",
75
+ css="@import url(https://tauhu.tw/tauhu-oo.css);",
76
+ theme=gr.themes.Default(
77
+ font=(
78
+ "tauhu-oo",
79
+ gr.themes.GoogleFont("Source Sans Pro"),
80
+ "ui-sans-serif",
81
+ "system-ui",
82
+ "sans-serif",
83
+ )
84
+ ),
85
+ )
86
+
87
+ with demo:
88
+ default_model_id = list(models_config.keys())[0]
89
+ model_drop_down = gr.Dropdown(
90
+ models_config.keys(),
91
+ value=default_model_id,
92
+ label="模型",
93
+ )
94
+
95
+ dialect_drop_down = gr.Dropdown(
96
+ choices=[
97
+ (k, v)
98
+ for k, v in models_config[default_model_id]["dialect_mapping"].items()
99
+ ],
100
+ value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
101
+ label="腔調",
102
+ )
103
+
104
+ model_drop_down.input(
105
+ when_model_selected,
106
+ inputs=[model_drop_down],
107
+ outputs=[dialect_drop_down],
108
+ )
109
+
110
+ gr.Markdown(
111
+ """
112
+ # 臺灣客語語音辨識系統
113
+ ### Taiwanese Hakka Automatic-Speech-Recognition System
114
+ ### 研發
115
+ - **[李鴻欣 Hung-Shin Lee](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
116
+ - **[陳力瑋 Li-Wei Chen](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
117
+ ### 合作單位
118
+ - **[國立聯合大學智慧客家實驗室](https://www.gohakka.org)**
119
+ """
120
+ )
121
+ state = gr.State()
122
+ audio = gr.Audio(
123
+ label="錄音",
124
+ type="numpy",
125
+ format="wav",
126
+ waveform_options=gr.WaveformOptions(
127
+ sample_rate=16000,
128
+ ),
129
+ sources=["microphone"],
130
+ streaming=True,
131
+ )
132
+ gr.Interface(
133
+ automatic_speech_recognition,
134
+ inputs=[
135
+ model_drop_down,
136
+ dialect_drop_down,
137
+ state,
138
+ audio,
139
+ ],
140
+ outputs=[
141
+ state,
142
+ gr.Text(interactive=False, label="客語漢字"),
143
+ ],
144
+ live=True,
145
+ stream_every=0.25,
146
+ clear_btn=None,
147
+ # flagging_mode="auto",
148
+ )
149
+
150
+
151
+ demo.launch()
configs/models.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ htia-0.1:
2
+ model:
3
+ htia_sixian: ${load_vosk:formospeech/kaldi-taiwanese-hakka-sixian}
4
+ htia_hailu: ${load_vosk:formospeech/kaldi-taiwanese-hakka-hailu}
5
+ htia_dapu: ${load_vosk:formospeech/kaldi-taiwanese-hakka-dapu}
6
+ htia_raoping: ${load_vosk:formospeech/kaldi-taiwanese-hakka-raoping}
7
+ htia_zhaoan: ${load_vosk:formospeech/kaldi-taiwanese-hakka-zhaoan}
8
+ htia_nansixian: ${load_vosk:formospeech/kaldi-taiwanese-hakka-nansixian}
9
+ dialect_mapping:
10
+ 四縣: htia_sixian
11
+ 海陸: htia_hailu
12
+ 大埔: htia_dapu
13
+ 饒平: htia_raoping
14
+ 詔安: htia_zhaoan
15
+ 南四縣: htia_nansixian
16
+
17
+ htia-0.1m:
18
+ model: ${load_vosk:formospeech/kaldi-taiwanese-hakka-merge}
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ vosk
2
+ omegaconf