Spaces:
Running
on
Zero
Running
on
Zero
import json | |
import gradio as gr | |
from huggingface_hub import snapshot_download | |
from omegaconf import OmegaConf | |
from vosk import KaldiRecognizer, Model | |
def load_vosk(model_id: str): | |
model_dir = snapshot_download(model_id) | |
return Model(model_path=model_dir) | |
OmegaConf.register_new_resolver("load_vosk", load_vosk) | |
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml")) | |
def automatic_speech_recognition( | |
model_id: str, dialect_id: str, stream: str, new_chunk: str | |
): | |
if isinstance(models_config[model_id]["model"], dict): | |
model = models_config[model_id]["model"][dialect_id] | |
else: | |
model = models_config[model_id]["model"] | |
sample_rate, audio_array = new_chunk | |
if audio_array.ndim == 2: | |
audio_array = audio_array[:, 0] | |
audio_bytes = audio_array.tobytes() | |
if stream is None: | |
rec = KaldiRecognizer(model, sample_rate) | |
rec.SetWords(True) | |
result = [] | |
else: | |
rec, result = stream | |
if rec.AcceptWaveform(audio_bytes): | |
text_result = json.loads(rec.Result())["text"] | |
if text_result != "": | |
result.append(text_result) | |
partial_result = "" | |
else: | |
partial_result = json.loads(rec.PartialResult())["partial"] + " " | |
if len(result) > 0: | |
output_text = ",".join(result) + "," + partial_result | |
else: | |
output_text = partial_result | |
return (rec, result), output_text | |
def when_model_selected(model_id: str): | |
model_config = models_config[model_id] | |
if "dialect_mapping" not in model_config: | |
return gr.update(visible=False) | |
dialect_drop_down_choices = [ | |
(k, v) for k, v in model_config["dialect_mapping"].items() | |
] | |
return gr.update( | |
choices=dialect_drop_down_choices, | |
value=dialect_drop_down_choices[0][1], | |
visible=True, | |
) | |
demo = gr.Blocks( | |
title="臺灣客語語音辨識系統", | |
css="@import url(https://tauhu.tw/tauhu-oo.css);", | |
theme=gr.themes.Default( | |
font=( | |
"tauhu-oo", | |
gr.themes.GoogleFont("Source Sans Pro"), | |
"ui-sans-serif", | |
"system-ui", | |
"sans-serif", | |
) | |
), | |
) | |
with demo: | |
default_model_id = list(models_config.keys())[0] | |
model_drop_down = gr.Dropdown( | |
models_config.keys(), | |
value=default_model_id, | |
label="模型", | |
) | |
dialect_drop_down = gr.Dropdown( | |
choices=[ | |
(k, v) | |
for k, v in models_config[default_model_id]["dialect_mapping"].items() | |
], | |
value=list(models_config[default_model_id]["dialect_mapping"].values())[0], | |
label="腔調", | |
) | |
model_drop_down.input( | |
when_model_selected, | |
inputs=[model_drop_down], | |
outputs=[dialect_drop_down], | |
) | |
gr.Markdown( | |
""" | |
# 臺灣客語語音辨識系統 | |
### Taiwanese Hakka Automatic-Speech-Recognition System | |
### 研發 | |
- **[李鴻欣 Hung-Shin Lee](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))** | |
- **[陳力瑋 Li-Wei Chen](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))** | |
### 合作單位 | |
- **[國立聯合大學智慧客家實驗室](https://www.gohakka.org)** | |
""" | |
) | |
state = gr.State() | |
audio = gr.Audio( | |
label="錄音", | |
type="numpy", | |
format="wav", | |
waveform_options=gr.WaveformOptions( | |
sample_rate=16000, | |
), | |
sources=["microphone"], | |
streaming=True, | |
) | |
gr.Interface( | |
automatic_speech_recognition, | |
inputs=[ | |
model_drop_down, | |
dialect_drop_down, | |
state, | |
audio, | |
], | |
outputs=[ | |
state, | |
gr.Text(interactive=False, label="客語漢字"), | |
], | |
live=True, | |
stream_every=0.25, | |
clear_btn=None, | |
# flagging_mode="auto", | |
) | |
demo.launch() | |