Spaces:
Running
Running
| # Copyright (c) 2023 Amphion. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import re | |
| from g2p_en import G2p | |
| from string import punctuation | |
| def read_lexicon(lex_path): | |
| lexicon = {} | |
| with open(lex_path) as f: | |
| for line in f: | |
| temp = re.split(r"\s+", line.strip("\n")) | |
| word = temp[0] | |
| phones = temp[1:] | |
| if word.lower() not in lexicon: | |
| lexicon[word.lower()] = phones | |
| return lexicon | |
| def preprocess_english(text, lexicon): | |
| text = text.rstrip(punctuation) | |
| g2p = G2p() | |
| phones = [] | |
| words = re.split(r"([,;.\-\?\!\s+])", text) | |
| for w in words: | |
| if w.lower() in lexicon: | |
| phones += lexicon[w.lower()] | |
| else: | |
| phones += list(filter(lambda p: p != " ", g2p(w))) | |
| phones = "}{".join(phones) | |
| phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones) | |
| phones = phones.replace("}{", " ") | |
| return phones | |