txya900619's picture
feat: add app.py
0c074b9
raw
history blame
1.48 kB
import re
from omegaconf import OmegaConf
XIUGULUAN_G2P = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"][
"阿美_秀姑巒"
]
def can_form_string(x, symbol_dict):
def helper(x, symbol_dict, matched_parts):
if not x:
return True, matched_parts
for key in symbol_dict.keys():
if x.startswith(key):
result, parts = helper(
x[len(key) :], symbol_dict, matched_parts + [key]
)
if result:
return True, parts
return False, []
return helper(x, symbol_dict, [])
def text_to_ipa(text, ignore_comma=True):
ipa = []
text = text.lower()
text = re.sub(r"[.?!]", "", text)
text = text.replace("'", "’")
words = text.split() # change in future
print(f"ipa: {words}")
for word in words:
ipa_parts = ""
extended_g2p = {**XIUGULUAN_G2P, ",": "" if ignore_comma else ","}
result, matched_parts = can_form_string(word, extended_g2p)
if result is False:
print(f"no match g2p : {word}")
return ""
for matched_part in matched_parts:
ipa_parts = ipa_parts + extended_g2p[matched_part]
ipa.append(ipa_parts)
ipa = (
" ".join(ipa)
.replace("g", "ɡ")
.replace("ʦ", "t͡s")
.replace("ʨ", "t͡ɕ")
.replace("R", "ʀ")
.replace("ʤ", "dʒ")
)
return ipa