Spaces:

Ganbatte
/

kyutaistt

Running

App Files Files Community

Ganbatte commited on 2 days ago

Commit

f94df04

verified ·

1 Parent(s): 13bfc57

Create app.py

Browse files

Files changed (1) hide show

app.py +46 -0

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import gradio as gr
+import subprocess
+import os
+import tempfile
+import sys
+HF_REPO = "kyutai/stt-2.6b-en"    # โมเดลที่จะโหลด
+def transcribe(audio_path: str) -> str:
+    """
+    รับพาธไฟล์เสียง (.wav/.mp3 ฯลฯ) แล้วเรียก moshi CLI
+    คืนค่าเป็น text transcript
+    """
+    if audio_path is None:
+        return ""
+    # moshi CLI: python -m moshi.run_inference --hf-repo <repo> <wav> :contentReference[oaicite:2]{index=2}
+    cmd = [
+        sys.executable, "-m", "moshi.run_inference",
+        "--hf-repo", HF_REPO,
+        audio_path
+    ]
+    # เก็บ stdout ทั้งหมดไว้ อ่านบรรทัดสุดท้ายเป็น transcription
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(result.stderr)
+    # moshi จะพิมพ์ผลทีละบรรทัด บรรทัดสุดท้ายคือคำถอดเสียงสมบูรณ์
+    lines = [l for l in result.stdout.splitlines() if l.strip()]
+    return lines[-1] if lines else "(no output)"
+demo = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(type="filepath", sources=["upload", "microphone"],
+                    label="Audio (16-32 kHz)"),
+    outputs=gr.Textbox(label="Transcription"),
+    title="Kyutai STT-2.6B (Streaming ASR)",
+    description=(
+        "อัปโหลดหรืออัดเสียงภาษาอังกฤษ แล้วกด Submit เพื่อถอดเสียงด้วยโมเดลขนาด 2.6 B "
+        "(ใช้ CLI ของ moshi ภายใน Space)"
+    ),
+)
+if __name__ == "__main__":
+    # share=True จะสร้าง public URL ให้อัตโนมัติหากเปิด “Community GPU” Space
+    demo.launch()