import os import json import time import numpy as np import soundfile as sf import onnxruntime as ort import gradio as gr # === 新增:版本印出,方便看 Logs 是否是新版本 === VERSION = "2025-08-14-3" print("🔧 Tsou TTS Space version:", VERSION) print("🔧 HF commit:", os.environ.get("HF_SPACE_COMMIT_SHA")) MODEL_PATH = "cou_total.onnx" CONFIG_PATH = "cou_medium.onnx.json" SCALES = (0.667, 1.0, 0.8) SAMPLES_DIR = "samples" os.makedirs(SAMPLES_DIR, exist_ok=True) # ===== 句子範例(已移除 cou_e_04320;新增你提供的 6 筆) ===== SENTENCE_EXAMPLES = [ { "id": "cou_b_03730", "zh": "但是我告訴你們,不可向欺負你們的人報復。", "tsou": "at'inghi te'o eʉsvʉta muu, teav'a auska 'ola pohcinghi muu ci cou.", "path": os.path.join(SAMPLES_DIR, "2.mp3"), }, { "id": "cou_e_03884", "zh": "那些是牙齒?", "tsou": "zou hisi simo maica?", "path": os.path.join(SAMPLES_DIR, "3.mp3"), }, { "id": "cou_b_06023", "zh": "前者傳基督的動機不純,是出於野心,想趁著我坐牢的時候給我製造更多的麻煩。", "tsou": "e isi engha ta auyusi homo psoyaski to kilisto o'amo cofkoya na 'tohʉngʉhe, mo i'mi no ma'papak'i, 'ucia ieni ho mi'o noekotu ho mooyai note asuhcu man'i ci ep'ʉcʉ.", "path": os.path.join(SAMPLES_DIR, "4.mp3"), }, { "id": "cou_b_01065", "zh": "耶穌開始工作以前,約翰向全體以色列人民傳道,要他們悔改,接受洗禮。", "tsou": "auyusi no ahoi yaahioa 'o iesu, moso mi'usnu ta acʉcʉhʉ ci cou'isolaelu 'o yohane ho psoecinghi to e'e, poa ma'vovei, tiou no toonono.", "path": os.path.join(SAMPLES_DIR, "5.mp3"), }, # ===== 新增的 6 筆 ===== { "id": "cou_e_05042", "zh": "是,我會做菜。", "tsou": "a lea’u, lea’u meelʉ pei’i to chae.", "path": os.path.join(SAMPLES_DIR, "cou_e_05042.mp3"), }, { "id": "cou_b_05359", "zh": "我知道你所做的,你怎樣辛勤工作,怎樣忍耐;我也知道你不容忍壞人,曾考驗過那些自稱是使徒而其實不是使徒的人,認出他們是假冒的。", "tsou": "os'o cohivi 'e osko hioa; la mainenu na hiasu ngoseo yaahioai, mainenu na hiasu buveici, os'o yaeza cohivi o'a lako peaezoni 'omo kuici cou. ohsula hʉsvʉta i'ola iachi mainca zou sitoo ho o'a a'ʉmtʉ sitoo ci cou, honga ho mio meknuyu.", "path": os.path.join(SAMPLES_DIR, "cou_b_05359.mp3"), }, { "id": "cou_e_04697", "zh": "我是老師。", "tsou": "zou lema'cohio na a’o.", "path": os.path.join(SAMPLES_DIR, "cou_e_04697.mp3"), }, { "id": "cou_e_04717", "zh": "不,我不是老師。", "tsou": "o’a, o’as’a lema'cohio na a’o.", "path": os.path.join(SAMPLES_DIR, "cou_e_04717.mp3"), }, { "id": "cou_e_04742", "zh": "這是我的削鉛筆刀。", "tsou": "zou fu’fu’u eni.", "path": os.path.join(SAMPLES_DIR, "cou_e_04742.mp3"), }, { "id": "cou_e_04766", "zh": "那不是我的書桌。", "tsou": "o’as’a pangka’u sico.", "path": os.path.join(SAMPLES_DIR, "cou_e_04766.mp3"), }, ] def _abs_or_none(p: str): p_abs = os.path.abspath(p) return p_abs if os.path.exists(p_abs) else None SENT_SAMPLES = [[e["id"], e["zh"], e["tsou"], _abs_or_none(e["path"])] for e in SENTENCE_EXAMPLES] # ===== 載入 config ===== def _load_config(cfg_path: str): if not os.path.exists(cfg_path): raise FileNotFoundError(f"找不到設定檔:{cfg_path}") with open(cfg_path, "r", encoding="utf-8") as f: return json.load(f) config = _load_config(CONFIG_PATH) phoneme_map = config.get("phoneme_id_map", {}) pad_id = phoneme_map.get("_") bos_id = phoneme_map.get("^") eos_id = phoneme_map.get("$") session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"]) def text_to_ids(text: str) -> np.ndarray: ids = [bos_id] + [phoneme_map.get(c, pad_id) for c in text] + [eos_id] return np.array(ids, dtype=np.int64) def synthesize(text: str): text = (text or "").strip() if not text: raise gr.Error("請輸入要合成的文字!") ids = text_to_ids(text).reshape(1, -1) ids_len = np.array([ids.shape[1]], dtype=np.int64) scales = np.array(list(SCALES), dtype=np.float32) audio = session.run(None, {"input": ids, "input_lengths": ids_len, "scales": scales})[0].squeeze() sr = int(config["audio"]["sample_rate"]) out_name = f"tsou_tts_{int(time.time()*1000)}.wav" sf.write(out_name, audio, samplerate=sr) return (sr, audio), out_name def on_row_select(evt: gr.SelectData, rows_state): row = evt.index[0] if evt and evt.index else None if row is None: return "", None samples = rows_state or [] if 0 <= row < len(samples): tsou = samples[row][2] or "" audio_path = samples[row][3] return tsou, audio_path return "", None with gr.Blocks(title="鄒語語音合成 (女聲) — ONNX") as demo: gr.Markdown( """ # 鄒語語音合成 (女聲) — ONNX 點選表格任一列可播放範例音檔,並自動將鄒語帶入上方輸入框。 """ ) with gr.Row(): text_in = gr.Textbox(lines=3, placeholder="請輸入鄒語文字…", label="輸入文字") example_player = gr.Audio(label="範例音檔播放器", type="filepath", interactive=False) run_btn = gr.Button("🚀 合成", variant="primary") audio_out = gr.Audio(label="合成音檔", interactive=False, show_download_button=True) file_out = gr.File(label="下載 WAV 檔") run_btn.click(synthesize, inputs=[text_in], outputs=[audio_out, file_out]) display_rows = [ [row[0], row[1], row[2]] # 刪掉音檔檔名那欄 for row in SENT_SAMPLES ] rows_state = gr.State(SENT_SAMPLES) gr.Markdown("### 句子(中文/鄒語)") table = gr.Dataframe( headers=["ID", "中文", "鄒語"], value=display_rows, interactive=False, row_count=(len(display_rows), "fixed"), col_count=(3, "fixed"), wrap=True, label="句子範例(點選一列即可播放與帶入文字)", ) table.select(on_row_select, inputs=[rows_state], outputs=[text_in, example_player]) if __name__ == "__main__": demo.launch()