Spaces:
Running
Running
Upload 5 files
Browse files- tools/__init__.py +3 -0
- tools/classify_language.py +197 -0
- tools/gen_phones.py +21 -0
- tools/log.py +16 -0
- tools/sentence.py +173 -0
tools/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
工具包
|
3 |
+
"""
|
tools/classify_language.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import regex as re
|
2 |
+
|
3 |
+
try:
|
4 |
+
from config import config
|
5 |
+
|
6 |
+
LANGUAGE_IDENTIFICATION_LIBRARY = (
|
7 |
+
config.webui_config.language_identification_library
|
8 |
+
)
|
9 |
+
except:
|
10 |
+
LANGUAGE_IDENTIFICATION_LIBRARY = "langid"
|
11 |
+
|
12 |
+
module = LANGUAGE_IDENTIFICATION_LIBRARY.lower()
|
13 |
+
|
14 |
+
langid_languages = [
|
15 |
+
"af",
|
16 |
+
"am",
|
17 |
+
"an",
|
18 |
+
"ar",
|
19 |
+
"as",
|
20 |
+
"az",
|
21 |
+
"be",
|
22 |
+
"bg",
|
23 |
+
"bn",
|
24 |
+
"br",
|
25 |
+
"bs",
|
26 |
+
"ca",
|
27 |
+
"cs",
|
28 |
+
"cy",
|
29 |
+
"da",
|
30 |
+
"de",
|
31 |
+
"dz",
|
32 |
+
"el",
|
33 |
+
"en",
|
34 |
+
"eo",
|
35 |
+
"es",
|
36 |
+
"et",
|
37 |
+
"eu",
|
38 |
+
"fa",
|
39 |
+
"fi",
|
40 |
+
"fo",
|
41 |
+
"fr",
|
42 |
+
"ga",
|
43 |
+
"gl",
|
44 |
+
"gu",
|
45 |
+
"he",
|
46 |
+
"hi",
|
47 |
+
"hr",
|
48 |
+
"ht",
|
49 |
+
"hu",
|
50 |
+
"hy",
|
51 |
+
"id",
|
52 |
+
"is",
|
53 |
+
"it",
|
54 |
+
"ja",
|
55 |
+
"jv",
|
56 |
+
"ka",
|
57 |
+
"kk",
|
58 |
+
"km",
|
59 |
+
"kn",
|
60 |
+
"ko",
|
61 |
+
"ku",
|
62 |
+
"ky",
|
63 |
+
"la",
|
64 |
+
"lb",
|
65 |
+
"lo",
|
66 |
+
"lt",
|
67 |
+
"lv",
|
68 |
+
"mg",
|
69 |
+
"mk",
|
70 |
+
"ml",
|
71 |
+
"mn",
|
72 |
+
"mr",
|
73 |
+
"ms",
|
74 |
+
"mt",
|
75 |
+
"nb",
|
76 |
+
"ne",
|
77 |
+
"nl",
|
78 |
+
"nn",
|
79 |
+
"no",
|
80 |
+
"oc",
|
81 |
+
"or",
|
82 |
+
"pa",
|
83 |
+
"pl",
|
84 |
+
"ps",
|
85 |
+
"pt",
|
86 |
+
"qu",
|
87 |
+
"ro",
|
88 |
+
"ru",
|
89 |
+
"rw",
|
90 |
+
"se",
|
91 |
+
"si",
|
92 |
+
"sk",
|
93 |
+
"sl",
|
94 |
+
"sq",
|
95 |
+
"sr",
|
96 |
+
"sv",
|
97 |
+
"sw",
|
98 |
+
"ta",
|
99 |
+
"te",
|
100 |
+
"th",
|
101 |
+
"tl",
|
102 |
+
"tr",
|
103 |
+
"ug",
|
104 |
+
"uk",
|
105 |
+
"ur",
|
106 |
+
"vi",
|
107 |
+
"vo",
|
108 |
+
"wa",
|
109 |
+
"xh",
|
110 |
+
"zh",
|
111 |
+
"zu",
|
112 |
+
]
|
113 |
+
|
114 |
+
|
115 |
+
def classify_language(text: str, target_languages: list = None) -> str:
|
116 |
+
if module == "fastlid" or module == "fasttext":
|
117 |
+
from fastlid import fastlid, supported_langs
|
118 |
+
|
119 |
+
classifier = fastlid
|
120 |
+
if target_languages != None:
|
121 |
+
target_languages = [
|
122 |
+
lang for lang in target_languages if lang in supported_langs
|
123 |
+
]
|
124 |
+
fastlid.set_languages = target_languages
|
125 |
+
elif module == "langid":
|
126 |
+
import langid
|
127 |
+
|
128 |
+
classifier = langid.classify
|
129 |
+
if target_languages != None:
|
130 |
+
target_languages = [
|
131 |
+
lang for lang in target_languages if lang in langid_languages
|
132 |
+
]
|
133 |
+
langid.set_languages(target_languages)
|
134 |
+
else:
|
135 |
+
raise ValueError(f"Wrong module {module}")
|
136 |
+
|
137 |
+
lang = classifier(text)[0]
|
138 |
+
|
139 |
+
return lang
|
140 |
+
|
141 |
+
|
142 |
+
def classify_zh_ja(text: str) -> str:
|
143 |
+
for idx, char in enumerate(text):
|
144 |
+
unicode_val = ord(char)
|
145 |
+
|
146 |
+
# 检测日语字符
|
147 |
+
if 0x3040 <= unicode_val <= 0x309F or 0x30A0 <= unicode_val <= 0x30FF:
|
148 |
+
return "ja"
|
149 |
+
|
150 |
+
# 检测汉字字符
|
151 |
+
if 0x4E00 <= unicode_val <= 0x9FFF:
|
152 |
+
# 检查周围的字符
|
153 |
+
next_char = text[idx + 1] if idx + 1 < len(text) else None
|
154 |
+
|
155 |
+
if next_char and (
|
156 |
+
0x3040 <= ord(next_char) <= 0x309F or 0x30A0 <= ord(next_char) <= 0x30FF
|
157 |
+
):
|
158 |
+
return "ja"
|
159 |
+
|
160 |
+
return "zh"
|
161 |
+
|
162 |
+
|
163 |
+
def split_alpha_nonalpha(text, mode=1):
|
164 |
+
if mode == 1:
|
165 |
+
pattern = r"(?<=[\u4e00-\u9fff\u3040-\u30FF\d\s])(?=[\p{Latin}])|(?<=[\p{Latin}\s])(?=[\u4e00-\u9fff\u3040-\u30FF\d])"
|
166 |
+
elif mode == 2:
|
167 |
+
pattern = r"(?<=[\u4e00-\u9fff\u3040-\u30FF\s])(?=[\p{Latin}\d])|(?<=[\p{Latin}\d\s])(?=[\u4e00-\u9fff\u3040-\u30FF])"
|
168 |
+
else:
|
169 |
+
raise ValueError("Invalid mode. Supported modes are 1 and 2.")
|
170 |
+
|
171 |
+
return re.split(pattern, text)
|
172 |
+
|
173 |
+
|
174 |
+
if __name__ == "__main__":
|
175 |
+
text = "这是一个测试文本"
|
176 |
+
print(classify_language(text))
|
177 |
+
print(classify_zh_ja(text)) # "zh"
|
178 |
+
|
179 |
+
text = "これはテストテキストです"
|
180 |
+
print(classify_language(text))
|
181 |
+
print(classify_zh_ja(text)) # "ja"
|
182 |
+
|
183 |
+
text = "vits和Bert-VITS2是tts模型。花费3days.花费3天。Take 3 days"
|
184 |
+
|
185 |
+
print(split_alpha_nonalpha(text, mode=1))
|
186 |
+
# output: ['vits', '和', 'Bert-VITS', '2是', 'tts', '模型。花费3', 'days.花费3天。Take 3 days']
|
187 |
+
|
188 |
+
print(split_alpha_nonalpha(text, mode=2))
|
189 |
+
# output: ['vits', '和', 'Bert-VITS2', '是', 'tts', '模型。花费', '3days.花费', '3', '天。Take 3 days']
|
190 |
+
|
191 |
+
text = "vits 和 Bert-VITS2 是 tts 模型。花费3days.花费3天。Take 3 days"
|
192 |
+
print(split_alpha_nonalpha(text, mode=1))
|
193 |
+
# output: ['vits ', '和 ', 'Bert-VITS', '2 ', '是 ', 'tts ', '模型。花费3', 'days.花费3天。Take ', '3 ', 'days']
|
194 |
+
|
195 |
+
text = "vits 和 Bert-VITS2 是 tts 模型。花费3days.花费3天。Take 3 days"
|
196 |
+
print(split_alpha_nonalpha(text, mode=2))
|
197 |
+
# output: ['vits ', '和 ', 'Bert-VITS2 ', '是 ', 'tts ', '模型。花费', '3days.花费', '3', '天。Take ', '3 ', 'days']
|
tools/gen_phones.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
文本转拼音
|
3 |
+
"""
|
4 |
+
import commons
|
5 |
+
from text import cleaned_text_to_sequence
|
6 |
+
from text.cleaner import clean_text
|
7 |
+
|
8 |
+
|
9 |
+
def gen_phones(text, language_str, add_blank, style_text=None, style_weight=0.7):
|
10 |
+
style_text = None if style_text == "" else style_text
|
11 |
+
# 在此处实现当前版本的get_text
|
12 |
+
norm_text, phone, tone, word2ph = clean_text(text, language_str)
|
13 |
+
phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
|
14 |
+
phone = commons.intersperse(phone, 0)
|
15 |
+
tone = commons.intersperse(tone, 0)
|
16 |
+
language = commons.intersperse(language, 0)
|
17 |
+
for i in range(len(word2ph)):
|
18 |
+
word2ph[i] = word2ph[i] * 2
|
19 |
+
word2ph[0] += 1
|
20 |
+
result = "{}|{}|{}|{}".format(norm_text, phone, tone, word2ph)
|
21 |
+
return result
|
tools/log.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
logger封装
|
3 |
+
"""
|
4 |
+
from loguru import logger
|
5 |
+
import sys
|
6 |
+
|
7 |
+
|
8 |
+
# 移除所有默认的处理器
|
9 |
+
logger.remove()
|
10 |
+
|
11 |
+
# 自定义格式并添加到标准输出
|
12 |
+
log_format = (
|
13 |
+
"<g>{time:MM-DD HH:mm:ss}</g> <lvl>{level:<9}</lvl>| {file}:{line} | {message}"
|
14 |
+
)
|
15 |
+
|
16 |
+
logger.add(sys.stdout, format=log_format, backtrace=True, diagnose=True)
|
tools/sentence.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import regex as re
|
4 |
+
|
5 |
+
from tools.classify_language import classify_language, split_alpha_nonalpha
|
6 |
+
|
7 |
+
|
8 |
+
def check_is_none(item) -> bool:
|
9 |
+
"""none -> True, not none -> False"""
|
10 |
+
return (
|
11 |
+
item is None
|
12 |
+
or (isinstance(item, str) and str(item).isspace())
|
13 |
+
or str(item) == ""
|
14 |
+
)
|
15 |
+
|
16 |
+
|
17 |
+
def markup_language(text: str, target_languages: list = None) -> str:
|
18 |
+
pattern = (
|
19 |
+
r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
|
20 |
+
r"\!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
|
21 |
+
r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
|
22 |
+
)
|
23 |
+
sentences = re.split(pattern, text)
|
24 |
+
|
25 |
+
pre_lang = ""
|
26 |
+
p = 0
|
27 |
+
|
28 |
+
if target_languages is not None:
|
29 |
+
sorted_target_languages = sorted(target_languages)
|
30 |
+
if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
|
31 |
+
new_sentences = []
|
32 |
+
for sentence in sentences:
|
33 |
+
new_sentences.extend(split_alpha_nonalpha(sentence))
|
34 |
+
sentences = new_sentences
|
35 |
+
|
36 |
+
for sentence in sentences:
|
37 |
+
if check_is_none(sentence):
|
38 |
+
continue
|
39 |
+
|
40 |
+
lang = classify_language(sentence, target_languages)
|
41 |
+
|
42 |
+
if pre_lang == "":
|
43 |
+
text = text[:p] + text[p:].replace(
|
44 |
+
sentence, f"[{lang.upper()}]{sentence}", 1
|
45 |
+
)
|
46 |
+
p += len(f"[{lang.upper()}]")
|
47 |
+
elif pre_lang != lang:
|
48 |
+
text = text[:p] + text[p:].replace(
|
49 |
+
sentence, f"[{pre_lang.upper()}][{lang.upper()}]{sentence}", 1
|
50 |
+
)
|
51 |
+
p += len(f"[{pre_lang.upper()}][{lang.upper()}]")
|
52 |
+
pre_lang = lang
|
53 |
+
p += text[p:].index(sentence) + len(sentence)
|
54 |
+
text += f"[{pre_lang.upper()}]"
|
55 |
+
|
56 |
+
return text
|
57 |
+
|
58 |
+
|
59 |
+
def split_by_language(text: str, target_languages: list = None) -> list:
|
60 |
+
pattern = (
|
61 |
+
r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
|
62 |
+
r"\!?\。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
|
63 |
+
r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
|
64 |
+
)
|
65 |
+
sentences = re.split(pattern, text)
|
66 |
+
|
67 |
+
pre_lang = ""
|
68 |
+
start = 0
|
69 |
+
end = 0
|
70 |
+
sentences_list = []
|
71 |
+
|
72 |
+
if target_languages is not None:
|
73 |
+
sorted_target_languages = sorted(target_languages)
|
74 |
+
if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
|
75 |
+
new_sentences = []
|
76 |
+
for sentence in sentences:
|
77 |
+
new_sentences.extend(split_alpha_nonalpha(sentence))
|
78 |
+
sentences = new_sentences
|
79 |
+
|
80 |
+
for sentence in sentences:
|
81 |
+
if check_is_none(sentence):
|
82 |
+
continue
|
83 |
+
|
84 |
+
lang = classify_language(sentence, target_languages)
|
85 |
+
|
86 |
+
end += text[end:].index(sentence)
|
87 |
+
if pre_lang != "" and pre_lang != lang:
|
88 |
+
sentences_list.append((text[start:end], pre_lang))
|
89 |
+
start = end
|
90 |
+
end += len(sentence)
|
91 |
+
pre_lang = lang
|
92 |
+
sentences_list.append((text[start:], pre_lang))
|
93 |
+
|
94 |
+
return sentences_list
|
95 |
+
|
96 |
+
|
97 |
+
def sentence_split(text: str, max: int) -> list:
|
98 |
+
pattern = r"[!(),—+\-.:;??。,、;:]+"
|
99 |
+
sentences = re.split(pattern, text)
|
100 |
+
discarded_chars = re.findall(pattern, text)
|
101 |
+
|
102 |
+
sentences_list, count, p = [], 0, 0
|
103 |
+
|
104 |
+
# 按被分割的符号遍历
|
105 |
+
for i, discarded_chars in enumerate(discarded_chars):
|
106 |
+
count += len(sentences[i]) + len(discarded_chars)
|
107 |
+
if count >= max:
|
108 |
+
sentences_list.append(text[p : p + count].strip())
|
109 |
+
p += count
|
110 |
+
count = 0
|
111 |
+
|
112 |
+
# 加入最后剩余的文本
|
113 |
+
if p < len(text):
|
114 |
+
sentences_list.append(text[p:])
|
115 |
+
|
116 |
+
return sentences_list
|
117 |
+
|
118 |
+
|
119 |
+
def sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None):
|
120 |
+
# 如果该speaker只支持一种语言
|
121 |
+
if speaker_lang is not None and len(speaker_lang) == 1:
|
122 |
+
if lang.upper() not in ["AUTO", "MIX"] and lang.lower() != speaker_lang[0]:
|
123 |
+
logging.debug(
|
124 |
+
f'lang "{lang}" is not in speaker_lang {speaker_lang},automatically set lang={speaker_lang[0]}'
|
125 |
+
)
|
126 |
+
lang = speaker_lang[0]
|
127 |
+
|
128 |
+
sentences_list = []
|
129 |
+
if lang.upper() != "MIX":
|
130 |
+
if max <= 0:
|
131 |
+
sentences_list.append(
|
132 |
+
markup_language(text, speaker_lang)
|
133 |
+
if lang.upper() == "AUTO"
|
134 |
+
else f"[{lang.upper()}]{text}[{lang.upper()}]"
|
135 |
+
)
|
136 |
+
else:
|
137 |
+
for i in sentence_split(text, max):
|
138 |
+
if check_is_none(i):
|
139 |
+
continue
|
140 |
+
sentences_list.append(
|
141 |
+
markup_language(i, speaker_lang)
|
142 |
+
if lang.upper() == "AUTO"
|
143 |
+
else f"[{lang.upper()}]{i}[{lang.upper()}]"
|
144 |
+
)
|
145 |
+
else:
|
146 |
+
sentences_list.append(text)
|
147 |
+
|
148 |
+
for i in sentences_list:
|
149 |
+
logging.debug(i)
|
150 |
+
|
151 |
+
return sentences_list
|
152 |
+
|
153 |
+
|
154 |
+
if __name__ == "__main__":
|
155 |
+
text = "这几天心里颇不宁静。今晚在院子里坐着乘凉,忽然想起日日走过的荷塘,在这满月的光里,总该另有一番样子吧。月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;妻在屋里拍着闰儿,迷迷糊糊地哼着眠歌。我悄悄地披了大衫,带上门出去。"
|
156 |
+
print(markup_language(text, target_languages=None))
|
157 |
+
print(sentence_split(text, max=50))
|
158 |
+
print(sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None))
|
159 |
+
|
160 |
+
text = "你好,这是一段用来测试自动标注的文本。こんにちは,これは自動ラベリングのテスト用テキストです.Hello, this is a piece of text to test autotagging.你好!今天我们要介绍VITS项目,其重点是使用了GAN Duration predictor和transformer flow,并且接入了Bert模型来提升韵律。Bert embedding会在稍后介绍。"
|
161 |
+
print(split_by_language(text, ["zh", "ja", "en"]))
|
162 |
+
|
163 |
+
text = "vits和Bert-VITS2是tts模型。花费3days.花费3天。Take 3 days"
|
164 |
+
|
165 |
+
print(split_by_language(text, ["zh", "ja", "en"]))
|
166 |
+
# output: [('vits', 'en'), ('和', 'ja'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
|
167 |
+
|
168 |
+
print(split_by_language(text, ["zh", "en"]))
|
169 |
+
# output: [('vits', 'en'), ('和', 'zh'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
|
170 |
+
|
171 |
+
text = "vits 和 Bert-VITS2 是 tts 模型。花费 3 days. 花费 3天。Take 3 days"
|
172 |
+
print(split_by_language(text, ["zh", "en"]))
|
173 |
+
# output: [('vits ', 'en'), ('和 ', 'zh'), ('Bert-VITS2 ', 'en'), ('是 ', 'zh'), ('tts ', 'en'), ('模型。花费 ', 'zh'), ('3 days. ', 'en'), ('花费 3天。', 'zh'), ('Take 3 days', 'en')]
|