benjaminzhang commited on
Commit
fa4895b
·
1 Parent(s): 593b69f

Add app & mp3 samples via Git LFS

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # 太魯閣語 (Truku) TTS — ONNX + Gradio(Hugging Face 版,公開可用)
2
  import os
3
  import json
4
  import time
@@ -7,43 +7,42 @@ import soundfile as sf
7
  import onnxruntime as ort
8
  import gradio as gr
9
 
10
- # ======== 固定參數(相對路徑,跟 app.py 放同一層) ========
11
  MODEL_PATH = "cou_total.onnx"
12
  CONFIG_PATH = "cou_medium.onnx.json"
13
- SCALES = (0.667, 1.0, 0.8) # (length_scale, noise_scale, noise_w)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # ======== 載入 config 與建立 ONNX session ========
16
  def _load_config(cfg_path: str):
17
  if not os.path.exists(cfg_path):
18
  raise FileNotFoundError(f"找不到設定檔:{cfg_path}")
19
  with open(cfg_path, "r", encoding="utf-8") as f:
20
  return json.load(f)
21
 
22
- try:
23
- _config = _load_config(CONFIG_PATH)
24
- except Exception as e:
25
- # 讓 Spaces 在介面上清楚回報錯誤
26
- raise RuntimeError(f"CONFIG 載入失敗:{e}")
27
-
28
- if "phoneme_id_map" not in _config:
29
- raise RuntimeError("CONFIG 缺少 'phoneme_id_map' 欄位")
30
-
31
  _phoneme_map = _config["phoneme_id_map"]
32
  _pad_id = _phoneme_map.get("_")
33
  _bos_id = _phoneme_map.get("^")
34
  _eos_id = _phoneme_map.get("$")
35
 
36
- if None in (_pad_id, _bos_id, _eos_id):
37
- raise RuntimeError("CONFIG 的 phoneme_id_map 缺少 _、^、$ 其中之一")
38
-
39
- if not os.path.exists(MODEL_PATH):
40
- raise FileNotFoundError(f"找不到模型檔:{MODEL_PATH}")
41
-
42
- # 若需 GPU,可改 providers=["CUDAExecutionProvider","CPUExecutionProvider"]
43
  _session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
44
 
45
  def _text_to_ids(text: str) -> np.ndarray:
46
- # 逐字轉 id;未知字用 pad_id;前後加 BOS/EOS
47
  ids = [_bos_id] + [_phoneme_map.get(c, _pad_id) for c in text] + [_eos_id]
48
  return np.array(ids, dtype=np.int64)
49
 
@@ -51,40 +50,45 @@ def synthesize(text: str):
51
  text = (text or "").strip()
52
  if not text:
53
  raise gr.Error("請輸入要合成的文字!")
54
-
55
  ids = _text_to_ids(text).reshape(1, -1)
56
  ids_len = np.array([ids.shape[1]], dtype=np.int64)
57
  scales = np.array(list(SCALES), dtype=np.float32)
58
-
59
- audio = _session.run(
60
- None,
61
- {"input": ids, "input_lengths": ids_len, "scales": scales},
62
- )[0].squeeze()
63
-
64
- # 取樣率
65
  sr = int(_config["audio"]["sample_rate"])
66
-
67
- # 寫檔供下載(放工作目錄即可)
68
- out_name = f"truku_tts_{int(time.time()*1000)}.wav"
69
- out_path = os.path.abspath(out_name)
70
- sf.write(out_path, audio, samplerate=sr)
71
-
72
- # 回傳 (sr, waveform) 供播放 + 檔案供下載
73
- return (sr, audio), out_path
74
-
75
- # ======== 極簡 UI ========
76
- demo = gr.Interface(
77
- fn=synthesize,
78
- inputs=gr.Textbox(lines=3, placeholder="請輸入太魯閣語文字…", label="輸入文字"),
79
- outputs=[
80
- gr.Audio(label="合成音檔", interactive=False, show_download_button=True),
81
- gr.File(label="下載 WAV 檔"),
82
- ],
83
- title="太魯閣語語音合成 (女聲) ONNX",
84
- description="輸入文字後按下「提交」即可合成。模型與參數固定在程式內。",
85
- allow_flagging="never",
86
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  if __name__ == "__main__":
89
- # 在 Hugging Face Spaces 不要指定 host/port,也不要 share=True
90
  demo.launch()
 
1
+ # 鄒語 (Tsou) TTS — ONNX + Gradio(Hugging Face 版)
2
  import os
3
  import json
4
  import time
 
7
  import onnxruntime as ort
8
  import gradio as gr
9
 
 
10
  MODEL_PATH = "cou_total.onnx"
11
  CONFIG_PATH = "cou_medium.onnx.json"
12
+ SCALES = (0.667, 1.0, 0.8)
13
+
14
+ SAMPLES_DIR = "samples"
15
+ os.makedirs(SAMPLES_DIR, exist_ok=True)
16
+
17
+ # 改為 mp3 檔案
18
+ NUM_EXAMPLES = [
19
+ {"zh": "一", "tsou": "coni", "mp3": os.path.join(SAMPLES_DIR, "1.mp3")},
20
+ {"zh": "二", "tsou": "yuso", "mp3": os.path.join(SAMPLES_DIR, "2.mp3")},
21
+ {"zh": "三", "tsou": "tuyu", "mp3": os.path.join(SAMPLES_DIR, "3.mp3")},
22
+ {"zh": "四", "tsou": "sʉptʉ", "mp3": os.path.join(SAMPLES_DIR, "4.mp3")},
23
+ {"zh": "五", "tsou": "eimo", "mp3": os.path.join(SAMPLES_DIR, "5.mp3")},
24
+ {"zh": "六", "tsou": "nomʉ", "mp3": os.path.join(SAMPLES_DIR, "6.mp3")},
25
+ {"zh": "七", "tsou": "pitu", "mp3": os.path.join(SAMPLES_DIR, "7.mp3")},
26
+ {"zh": "八", "tsou": "voyu", "mp3": os.path.join(SAMPLES_DIR, "8.mp3")},
27
+ {"zh": "九", "tsou": "siyo", "mp3": os.path.join(SAMPLES_DIR, "9.mp3")},
28
+ {"zh": "十", "tsou": "maskʉ", "mp3": os.path.join(SAMPLES_DIR, "10.mp3")},
29
+ ]
30
 
 
31
  def _load_config(cfg_path: str):
32
  if not os.path.exists(cfg_path):
33
  raise FileNotFoundError(f"找不到設定檔:{cfg_path}")
34
  with open(cfg_path, "r", encoding="utf-8") as f:
35
  return json.load(f)
36
 
37
+ _config = _load_config(CONFIG_PATH)
 
 
 
 
 
 
 
 
38
  _phoneme_map = _config["phoneme_id_map"]
39
  _pad_id = _phoneme_map.get("_")
40
  _bos_id = _phoneme_map.get("^")
41
  _eos_id = _phoneme_map.get("$")
42
 
 
 
 
 
 
 
 
43
  _session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
44
 
45
  def _text_to_ids(text: str) -> np.ndarray:
 
46
  ids = [_bos_id] + [_phoneme_map.get(c, _pad_id) for c in text] + [_eos_id]
47
  return np.array(ids, dtype=np.int64)
48
 
 
50
  text = (text or "").strip()
51
  if not text:
52
  raise gr.Error("請輸入要合成的文字!")
 
53
  ids = _text_to_ids(text).reshape(1, -1)
54
  ids_len = np.array([ids.shape[1]], dtype=np.int64)
55
  scales = np.array(list(SCALES), dtype=np.float32)
56
+ audio = _session.run(None, {"input": ids, "input_lengths": ids_len, "scales": scales})[0].squeeze()
 
 
 
 
 
 
57
  sr = int(_config["audio"]["sample_rate"])
58
+ out_name = f"tsou_tts_{int(time.time()*1000)}.wav"
59
+ sf.write(out_name, audio, samplerate=sr)
60
+ return (sr, audio), out_name
61
+
62
+ def _pick_example(sample):
63
+ if isinstance(sample, list) and sample:
64
+ return sample[1]
65
+ return ""
66
+
67
+ with gr.Blocks(title="鄒語語音合成 (女聲) ONNX") as demo:
68
+ gr.Markdown("""
69
+ # 鄒語語音合成 (女聲) — ONNX
70
+ - 輸入鄒語文字,按「合成」取得音檔
71
+ - 下方提供 **中文/鄒語/音檔** 範例(數字 1–10,mp3 格式)。點範例可自動帶入上方輸入框。
72
+ """)
73
+ text_in = gr.Textbox(lines=3, placeholder="請輸入鄒語文字…", label="輸入文字")
74
+ run_btn = gr.Button("🚀 合成", variant="primary")
75
+ audio_out = gr.Audio(label="合成音檔", interactive=False, show_download_button=True)
76
+ file_out = gr.File(label="下載 WAV 檔")
77
+ run_btn.click(synthesize, inputs=[text_in], outputs=[audio_out, file_out])
78
+ gr.Markdown("""
79
+ ### 範例(中文/鄒語/音檔:1~10,mp3 格式)
80
+ 請將 `1.mp3` ~ `10.mp3` 放到 `samples/` 資料夾。
81
+ """)
82
+ dataset = gr.Dataset(
83
+ components=[
84
+ gr.Textbox(label="中文", interactive=False),
85
+ gr.Textbox(label="鄒語", interactive=False),
86
+ gr.Audio(label="音檔", interactive=False),
87
+ ],
88
+ samples=[[e["zh"], e["tsou"], e["mp3"] if os.path.exists(e["mp3"]) else None] for e in NUM_EXAMPLES],
89
+ label="中文/鄒語/音檔(1~10,mp3 格式)",
90
+ )
91
+ dataset.click(_pick_example, inputs=dataset, outputs=text_in)
92
 
93
  if __name__ == "__main__":
 
94
  demo.launch()
samples/1.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3769ff49ada3e47f632c80413eba5da2636dd01a309cd5d7a394cf78c4807c1
3
+ size 54629
samples/2.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf809334b0f967f8c5299579bd24f2794c2d2438602b97c63724310aa990d77
3
+ size 150752
samples/3.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ea60eccf5912a305bc060dd928df57bfbbf4e39f0c0de27b045c901cee0e07b
3
+ size 59442
samples/4.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2462c07d68b28a49eacda6cd1887632987a73c247d6517c42b883b90f63af8
3
+ size 432698
samples/5.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebe4cbc9d9605b41a6a7e084b518c2225152e887c4f6b2df6193729ad6b51d2
3
+ size 459894
samples/cou_b_01065_c19.mp3:Zone.Identifier ADDED
File without changes
samples/cou_b_03730_c19.mp3:Zone.Identifier ADDED
File without changes
samples/cou_b_06023_c19.mp3:Zone.Identifier ADDED
File without changes
samples/cou_e_03884_c19.mp3:Zone.Identifier ADDED
File without changes
samples/cou_e_04320_c19.mp3:Zone.Identifier ADDED
File without changes