Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
1852d76
1
Parent(s):
f6358f1
feat: upper first char and add final punc
Browse files
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import torch
|
3 |
from omegaconf import OmegaConf
|
4 |
from transformers import pipeline
|
5 |
-
import spaces
|
6 |
|
7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
@@ -19,46 +21,60 @@ def load_pipe(model_id: str):
|
|
19 |
device=device,
|
20 |
)
|
21 |
|
|
|
22 |
OmegaConf.register_new_resolver("load_pipe", load_pipe)
|
23 |
|
24 |
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
25 |
|
|
|
26 |
@spaces.GPU
|
27 |
def automatic_speech_recognition(model_id: str, dialect_id: str, audio_file: str):
|
28 |
model = models_config[model_id]["model"]
|
29 |
-
|
30 |
generate_kwargs = {
|
31 |
"task": "transcribe",
|
32 |
"language": "id",
|
33 |
"num_beams": 5,
|
34 |
}
|
35 |
if models_config[model_id]["dialect_mapping"] is not None:
|
36 |
-
generate_kwargs["prompt_ids"] = torch.from_numpy(
|
37 |
-
|
38 |
-
)
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
def when_model_selected(model_id: str):
|
43 |
model_config = models_config[model_id]
|
44 |
|
45 |
-
if model_config["dialect_mapping"] is not None:
|
46 |
dialect_drop_down_choices = [
|
47 |
(k, v) for k, v in model_config["dialect_mapping"].items()
|
48 |
]
|
49 |
-
|
50 |
return gr.update(
|
51 |
choices=dialect_drop_down_choices,
|
52 |
value=dialect_drop_down_choices[0][1],
|
53 |
)
|
54 |
else:
|
55 |
-
return gr.update(
|
56 |
-
|
57 |
-
)
|
58 |
|
59 |
def get_title():
|
60 |
with open("DEMO.md") as tong:
|
61 |
-
return tong.readline().strip(
|
|
|
62 |
|
63 |
demo = gr.Blocks(
|
64 |
title=get_title(),
|
@@ -90,7 +106,7 @@ with demo:
|
|
90 |
],
|
91 |
# value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
|
92 |
label="族別",
|
93 |
-
visible=False
|
94 |
)
|
95 |
|
96 |
model_drop_down.input(
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
import gradio as gr
|
4 |
+
import spaces
|
5 |
import torch
|
6 |
from omegaconf import OmegaConf
|
7 |
from transformers import pipeline
|
|
|
8 |
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
|
|
21 |
device=device,
|
22 |
)
|
23 |
|
24 |
+
|
25 |
OmegaConf.register_new_resolver("load_pipe", load_pipe)
|
26 |
|
27 |
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
28 |
|
29 |
+
|
30 |
@spaces.GPU
|
31 |
def automatic_speech_recognition(model_id: str, dialect_id: str, audio_file: str):
|
32 |
model = models_config[model_id]["model"]
|
33 |
+
|
34 |
generate_kwargs = {
|
35 |
"task": "transcribe",
|
36 |
"language": "id",
|
37 |
"num_beams": 5,
|
38 |
}
|
39 |
if models_config[model_id]["dialect_mapping"] is not None:
|
40 |
+
generate_kwargs["prompt_ids"] = torch.from_numpy(
|
41 |
+
model.tokenizer.get_prompt_ids(dialect_id)
|
42 |
+
).to(device)
|
43 |
+
|
44 |
+
result = model(audio_file, generate_kwargs=generate_kwargs)["text"].replace(
|
45 |
+
f" {dialect_id}", ""
|
46 |
+
)
|
47 |
+
|
48 |
+
if result[-1] not in ".!?":
|
49 |
+
result = result + "."
|
50 |
+
|
51 |
+
sentences = re.split(r"[.!?] ", result)
|
52 |
+
for i in range(len(sentences)):
|
53 |
+
sentences[i] = sentences[i][0].upper() + sentences[i][1:]
|
54 |
+
|
55 |
+
return " ".join(sentences)
|
56 |
|
57 |
|
58 |
def when_model_selected(model_id: str):
|
59 |
model_config = models_config[model_id]
|
60 |
|
61 |
+
if model_config["dialect_mapping"] is not None:
|
62 |
dialect_drop_down_choices = [
|
63 |
(k, v) for k, v in model_config["dialect_mapping"].items()
|
64 |
]
|
65 |
+
|
66 |
return gr.update(
|
67 |
choices=dialect_drop_down_choices,
|
68 |
value=dialect_drop_down_choices[0][1],
|
69 |
)
|
70 |
else:
|
71 |
+
return gr.update(visible=False)
|
72 |
+
|
|
|
73 |
|
74 |
def get_title():
|
75 |
with open("DEMO.md") as tong:
|
76 |
+
return tong.readline().strip("# ")
|
77 |
+
|
78 |
|
79 |
demo = gr.Blocks(
|
80 |
title=get_title(),
|
|
|
106 |
],
|
107 |
# value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
|
108 |
label="族別",
|
109 |
+
visible=False,
|
110 |
)
|
111 |
|
112 |
model_drop_down.input(
|