Spaces:

benjaminzhang
/

cou_tts

Sleeping

App Files Files Community

benjaminzhang commited on Aug 12

Commit

fa4895b

1 Parent(s): 593b69f

Add app & mp3 samples via Git LFS

Browse files

Files changed (12) hide show

.gitattributes +1 -0
app.py +55 -51
samples/1.mp3 +3 -0
samples/2.mp3 +3 -0
samples/3.mp3 +3 -0
samples/4.mp3 +3 -0
samples/5.mp3 +3 -0
samples/cou_b_01065_c19.mp3:Zone.Identifier +0 -0
samples/cou_b_03730_c19.mp3:Zone.Identifier +0 -0
samples/cou_b_06023_c19.mp3:Zone.Identifier +0 -0
samples/cou_e_03884_c19.mp3:Zone.Identifier +0 -0
samples/cou_e_04320_c19.mp3:Zone.Identifier +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# 太魯閣語 (Truku) TTS — ONNX + Gradio（Hugging Face 版，公開可用）
 import os
 import json
 import time
@@ -7,43 +7,42 @@ import soundfile as sf
 import onnxruntime as ort
 import gradio as gr
-# ======== 固定參數（相對路徑，跟 app.py 放同一層） ========
 MODEL_PATH = "cou_total.onnx"
 CONFIG_PATH = "cou_medium.onnx.json"
-SCALES = (0.667, 1.0, 0.8)  # (length_scale, noise_scale, noise_w)
-# ======== 載入 config 與建立 ONNX session ========
 def _load_config(cfg_path: str):
     if not os.path.exists(cfg_path):
         raise FileNotFoundError(f"找不到設定檔：{cfg_path}")
     with open(cfg_path, "r", encoding="utf-8") as f:
         return json.load(f)
-try:
-    _config = _load_config(CONFIG_PATH)
-except Exception as e:
-    # 讓 Spaces 在介面上清楚回報錯誤
-    raise RuntimeError(f"CONFIG 載入失敗：{e}")
-if "phoneme_id_map" not in _config:
-    raise RuntimeError("CONFIG 缺少 'phoneme_id_map' 欄位")
 _phoneme_map = _config["phoneme_id_map"]
 _pad_id = _phoneme_map.get("_")
 _bos_id = _phoneme_map.get("^")
 _eos_id = _phoneme_map.get("$")
-if None in (_pad_id, _bos_id, _eos_id):
-    raise RuntimeError("CONFIG 的 phoneme_id_map 缺少 _、^、$ 其中之一")
-if not os.path.exists(MODEL_PATH):
-    raise FileNotFoundError(f"找不到模型檔：{MODEL_PATH}")
-# 若需 GPU，可改 providers=["CUDAExecutionProvider","CPUExecutionProvider"]
 _session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
 def _text_to_ids(text: str) -> np.ndarray:
-    # 逐字轉 id；未知字用 pad_id；前後加 BOS/EOS
     ids = [_bos_id] + [_phoneme_map.get(c, _pad_id) for c in text] + [_eos_id]
     return np.array(ids, dtype=np.int64)
@@ -51,40 +50,45 @@ def synthesize(text: str):
     text = (text or "").strip()
     if not text:
         raise gr.Error("請輸入要合成的文字！")
     ids = _text_to_ids(text).reshape(1, -1)
     ids_len = np.array([ids.shape[1]], dtype=np.int64)
     scales = np.array(list(SCALES), dtype=np.float32)
-    audio = _session.run(
-        None,
-        {"input": ids, "input_lengths": ids_len, "scales": scales},
-    )[0].squeeze()
-    # 取樣率
     sr = int(_config["audio"]["sample_rate"])
-    # 寫檔供下載（放工作目錄即可）
-    out_name = f"truku_tts_{int(time.time()*1000)}.wav"
-    out_path = os.path.abspath(out_name)
-    sf.write(out_path, audio, samplerate=sr)
-    # 回傳 (sr, waveform) 供播放 + 檔案供下載
-    return (sr, audio), out_path
-# ======== 極簡 UI ========
-demo = gr.Interface(
-    fn=synthesize,
-    inputs=gr.Textbox(lines=3, placeholder="請輸入太魯閣語文字…", label="輸入文字"),
-    outputs=[
-        gr.Audio(label="合成音檔", interactive=False, show_download_button=True),
-        gr.File(label="下載 WAV 檔"),
-    ],
-    title="太魯閣語語音合成 (女聲) — ONNX",
-    description="輸入文字後按下「提交」即可合成。模型與參數固定在程式內。",
-    allow_flagging="never",
-)
 if __name__ == "__main__":
-    # 在 Hugging Face Spaces 不要指定 host/port，也不要 share=True
     demo.launch()

+# 鄒語 (Tsou) TTS — ONNX + Gradio（Hugging Face 版）
 import os
 import json
 import time
 import onnxruntime as ort
 import gradio as gr
 MODEL_PATH = "cou_total.onnx"
 CONFIG_PATH = "cou_medium.onnx.json"
+SCALES = (0.667, 1.0, 0.8)
+SAMPLES_DIR = "samples"
+os.makedirs(SAMPLES_DIR, exist_ok=True)
+# 改為 mp3 檔案
+NUM_EXAMPLES = [
+    {"zh": "一",  "tsou": "coni",  "mp3": os.path.join(SAMPLES_DIR, "1.mp3")},
+    {"zh": "二",  "tsou": "yuso",  "mp3": os.path.join(SAMPLES_DIR, "2.mp3")},
+    {"zh": "三",  "tsou": "tuyu",  "mp3": os.path.join(SAMPLES_DIR, "3.mp3")},
+    {"zh": "四",  "tsou": "sʉptʉ", "mp3": os.path.join(SAMPLES_DIR, "4.mp3")},
+    {"zh": "五",  "tsou": "eimo",  "mp3": os.path.join(SAMPLES_DIR, "5.mp3")},
+    {"zh": "六",  "tsou": "nomʉ",  "mp3": os.path.join(SAMPLES_DIR, "6.mp3")},
+    {"zh": "七",  "tsou": "pitu",  "mp3": os.path.join(SAMPLES_DIR, "7.mp3")},
+    {"zh": "八",  "tsou": "voyu",  "mp3": os.path.join(SAMPLES_DIR, "8.mp3")},
+    {"zh": "九",  "tsou": "siyo",  "mp3": os.path.join(SAMPLES_DIR, "9.mp3")},
+    {"zh": "十",  "tsou": "maskʉ", "mp3": os.path.join(SAMPLES_DIR, "10.mp3")},
+]
 def _load_config(cfg_path: str):
     if not os.path.exists(cfg_path):
         raise FileNotFoundError(f"找不到設定檔：{cfg_path}")
     with open(cfg_path, "r", encoding="utf-8") as f:
         return json.load(f)
+_config = _load_config(CONFIG_PATH)
 _phoneme_map = _config["phoneme_id_map"]
 _pad_id = _phoneme_map.get("_")
 _bos_id = _phoneme_map.get("^")
 _eos_id = _phoneme_map.get("$")
 _session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
 def _text_to_ids(text: str) -> np.ndarray:
     ids = [_bos_id] + [_phoneme_map.get(c, _pad_id) for c in text] + [_eos_id]
     return np.array(ids, dtype=np.int64)
     text = (text or "").strip()
     if not text:
         raise gr.Error("請輸入要合成的文字！")
     ids = _text_to_ids(text).reshape(1, -1)
     ids_len = np.array([ids.shape[1]], dtype=np.int64)
     scales = np.array(list(SCALES), dtype=np.float32)
+    audio = _session.run(None, {"input": ids, "input_lengths": ids_len, "scales": scales})[0].squeeze()
     sr = int(_config["audio"]["sample_rate"])
+    out_name = f"tsou_tts_{int(time.time()*1000)}.wav"
+    sf.write(out_name, audio, samplerate=sr)
+    return (sr, audio), out_name
+def _pick_example(sample):
+    if isinstance(sample, list) and sample:
+        return sample[1]
+    return ""
+with gr.Blocks(title="鄒語語音合成 (女聲) — ONNX") as demo:
+    gr.Markdown("""
+    # 鄒語語音合成 (女聲) — ONNX
+    - 輸入鄒語文字，按「合成」取得音檔
+    - 下方提供 **中文／鄒語／音檔** 範例（數字 1–10，mp3 格式）。點範例可自動帶入上方輸入框。
+    """)
+    text_in = gr.Textbox(lines=3, placeholder="請輸入鄒語文字…", label="輸入文字")
+    run_btn = gr.Button("🚀 合成", variant="primary")
+    audio_out = gr.Audio(label="合成音檔", interactive=False, show_download_button=True)
+    file_out = gr.File(label="下載 WAV 檔")
+    run_btn.click(synthesize, inputs=[text_in], outputs=[audio_out, file_out])
+    gr.Markdown("""
+    ### 範例（中文／鄒語／音檔：1～10，mp3 格式）
+    請將 `1.mp3` ~ `10.mp3` 放到 `samples/` 資料夾。
+    """)
+    dataset = gr.Dataset(
+        components=[
+            gr.Textbox(label="中文", interactive=False),
+            gr.Textbox(label="鄒語", interactive=False),
+            gr.Audio(label="音檔", interactive=False),
+        ],
+        samples=[[e["zh"], e["tsou"], e["mp3"] if os.path.exists(e["mp3"]) else None] for e in NUM_EXAMPLES],
+        label="中文／鄒語／音檔（1～10，mp3 格式）",
+    )
+    dataset.click(_pick_example, inputs=dataset, outputs=text_in)
 if __name__ == "__main__":
     demo.launch()

samples/1.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3769ff49ada3e47f632c80413eba5da2636dd01a309cd5d7a394cf78c4807c1
+size 54629

samples/2.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cf809334b0f967f8c5299579bd24f2794c2d2438602b97c63724310aa990d77
+size 150752

samples/3.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ea60eccf5912a305bc060dd928df57bfbbf4e39f0c0de27b045c901cee0e07b
+size 59442

samples/4.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc2462c07d68b28a49eacda6cd1887632987a73c247d6517c42b883b90f63af8
+size 432698

samples/5.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ebe4cbc9d9605b41a6a7e084b518c2225152e887c4f6b2df6193729ad6b51d2
+size 459894

samples/cou_b_01065_c19.mp3:Zone.Identifier ADDED Viewed

File without changes

samples/cou_b_03730_c19.mp3:Zone.Identifier ADDED Viewed

File without changes

samples/cou_b_06023_c19.mp3:Zone.Identifier ADDED Viewed

File without changes

samples/cou_e_03884_c19.mp3:Zone.Identifier ADDED Viewed

File without changes

samples/cou_e_04320_c19.mp3:Zone.Identifier ADDED Viewed

File without changes