GPT-SoVITS-ProPlus

Running on Zero

App Files Files Community

lj1995 commited on Jul 11

Commit

4c1cc9a

verified ·

1 Parent(s): b40b16a

Update inference_webui.py

Browse files

Files changed (1) hide show

inference_webui.py +39 -31

inference_webui.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 os.makedirs("pretrained_models", exist_ok=True)
 from huggingface_hub import snapshot_download
@@ -84,15 +85,22 @@ from module.mel_processing import spectrogram_torch
 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
-from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
-language=os.environ.get("language","Auto")
-language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
-i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
 if torch.cuda.is_available():
     device = "cuda"
@@ -102,25 +110,25 @@ else:
     is_half = False
 dict_language_v1 = {
-    i18n("中文"): "all_zh",  # 全部按中文识别
-    i18n("英文"): "en",  # 全部按英文识别#######不变
-    i18n("日文"): "all_ja",  # 全部按日文识别
-    i18n("中英混合"): "zh",  # 按中英混合识别####不变
-    i18n("日英混合"): "ja",  # 按日英混合识别####不变
-    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
-    i18n("中文"): "all_zh",  # 全部按中文识别
-    i18n("英文"): "en",  # 全部按英文识别#######不变
-    i18n("日文"): "all_ja",  # 全部按日文识别
-    i18n("粤语"): "all_yue",  # 全部按中文识别
-    i18n("韩文"): "all_ko",  # 全部按韩文识别
-    i18n("中英混合"): "zh",  # 按中英混合识别####不变
-    i18n("日英混合"): "ja",  # 按日英混合识别####不变
-    i18n("粤英混合"): "yue",  # 按粤英混合识别####不变
-    i18n("韩英混合"): "ko",  # 按韩英混合识别####不变
-    i18n("多语种混合"): "auto",  # 多语种启动切分识别语种
-    i18n("多语种混合(粤语)"): "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
@@ -211,7 +219,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
     print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     if prompt_language is not None and text_language is not None:
-        if prompt_language in list(dict_language.keys()):
             prompt_text_update, prompt_language_update = (
                 {"__type__": "update"},
                 {"__type__": "update", "value": prompt_language},
@@ -219,7 +227,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
         else:
             prompt_text_update = {"__type__": "update", "value": ""}
             prompt_language_update = {"__type__": "update", "value": i18n("中文")}
-        if text_language in list(dict_language.keys()):
             text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
         else:
             text_update = {"__type__": "update", "value": ""}
@@ -468,12 +476,12 @@ def get_tts_wav(
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
-        print(i18n("实际输入的参考文本:"), prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
-    print(i18n("实际输入的目标文本:"), text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
@@ -517,7 +525,7 @@ def get_tts_wav(
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
-    print(i18n("实际输入的目标文本(切句后):"), text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
@@ -533,9 +541,9 @@ def get_tts_wav(
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
-        print(i18n("实际输入的目标文本(每句):"), text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
-        print(i18n("前端处理后的文本(每句):"), norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
@@ -892,9 +900,9 @@ if __name__ == "__main__":
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
-                prompt_language=i18n("中文"),
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
-                text_language=i18n("多语种混合"),
                 inp_refs=[],
             )
             next(gen)
@@ -903,5 +911,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
-        allowed_paths=["/"]#,i18n=i18n
     )

 import os
+os.system("pip install gradio-client==1.10.4 gradio-5.35.0-py3-none-any.whl")
 os.makedirs("pretrained_models", exist_ok=True)
 from huggingface_hub import snapshot_download
 from module.models import SynthesizerTrn
 from text import cleaned_text_to_sequence
 from text.cleaner import clean_text
+# from tools.i18n.i18n import I18nAuto, scan_language_list
 from tools.my_utils import load_audio
+# language=os.environ.get("language","Auto")
+# language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
+# i18n = I18nAuto(language="Auto")
 # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
+i18n_dict={}
+json_root="tools/i18n/locale"
+for name in os.listdir(json_root):
+    with open("%s/%s"%(json_root,name),"r")as f:
+        data=json.loads(f.read())
+    i18n_dict[name.split(".json")[0].replace("_","-")]=data
+i18n=gr.I18n(**i18n_dict)
 if torch.cuda.is_available():
     device = "cuda"
     is_half = False
 dict_language_v1 = {
+    "中文": "all_zh",  # 全部按中文识别
+    "英文": "en",  # 全部按英文识别#######不变
+    "日文": "all_ja",  # 全部按日文识别
+    "中英混合": "zh",  # 按中英混合识别####不变
+    "日英混合": "ja",  # 按日英混合识别####不变
+    "多语种混合": "auto",  # 多语种启动切分识别语种
 }
 dict_language_v2 = {
+    "中文": "all_zh",  # 全部按中文识别
+    "英文": "en",  # 全部按英文识别#######不变
+    "日文": "all_ja",  # 全部按日文识别
+    "粤语": "all_yue",  # 全部按中文识别
+    "韩文": "all_ko",  # 全部按韩文识别
+    "中英混合": "zh",  # 按中英混合识别####不变
+    "日英混合": "ja",  # 按日英混合识别####不变
+    "粤英混合": "yue",  # 按粤英混合识别####不变
+    "韩英混合": "ko",  # 按韩英混合识别####不变
+    "多语种混合": "auto",  # 多语种启动切分识别语种
+    "多语种混合(粤语)": "auto_yue",  # 多语种启动切分识别语种
 }
 dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2
     if prompt_language is not None and text_language is not None:
+        if prompt_language in dict_language:
             prompt_text_update, prompt_language_update = (
                 {"__type__": "update"},
                 {"__type__": "update", "value": prompt_language},
         else:
             prompt_text_update = {"__type__": "update", "value": ""}
             prompt_language_update = {"__type__": "update", "value": i18n("中文")}
+        if text_language in dict_language:
             text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
         else:
             text_update = {"__type__": "update", "value": ""}
         prompt_text = prompt_text.strip("\n")
         if prompt_text[-1] not in splits:
             prompt_text += "。" if prompt_language != "en" else "."
+        print(i18n("实际输入的参考文本:").key, prompt_text)
     text = text.strip("\n")
     if text[0] not in splits and len(get_first(text)) < 4:
         text = "。" + text if text_language != "en" else "." + text
+    print(i18n("实际输入的目标文本:").key, text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
+    print(i18n("实际输入的目标文本(切句后):").key, text)
     texts = text.split("\n")
     texts = process_text(texts)
     texts = merge_short_text_in_array(texts, 5)
             continue
         if text[-1] not in splits:
             text += "。" if text_language != "en" else "."
+        print(i18n("实际输入的目标文本(每句):").key, text)
         phones2, bert2, norm_text2 = get_phones_and_bert(text, text_language, version)
+        print(i18n("前端处理后的文本(每句):").key, norm_text2)
         if not ref_free:
             bert = torch.cat([bert1, bert2], 1)
             all_phoneme_ids = torch.LongTensor(phones1 + phones2).to(device).unsqueeze(0)
             gen = get_tts_wav(
                 ref_wav_path=file_name,
                 prompt_text="",
+                prompt_language="中文",
                 text="犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之,犯大吴疆土者,盛必击而破之.你好世界 Love you 世界へ 안녕하세요",
+                text_language="多语种混合",
                 inp_refs=[],
             )
             next(gen)
         server_name="0.0.0.0",
         inbrowser=True,
         show_api=False,
+        allowed_paths=["/"],i18n=i18n
     )