Spaces:

Ganbatte
/

kyutaistt

Running

App Files Files Community

kyutaistt / app.py

Ganbatte

Create app.py

f94df04 verified 2 days ago

raw

history blame contribute delete

1.95 kB

	import gradio as gr
	import subprocess
	import os
	import tempfile
	import sys

	HF_REPO = "kyutai/stt-2.6b-en" # โมเดลที่จะโหลด

	def transcribe(audio_path: str) -> str:
	"""
	รับพาธไฟล์เสียง (.wav/.mp3 ฯลฯ) แล้วเรียก moshi CLI
	คืนค่าเป็น text transcript
	"""
	if audio_path is None:
	return ""

	# moshi CLI: python -m moshi.run_inference --hf-repo <repo> <wav> :contentReference[oaicite:2]{index=2}
	cmd = [
	sys.executable, "-m", "moshi.run_inference",
	"--hf-repo", HF_REPO,
	audio_path
	]
	# เก็บ stdout ทั้งหมดไว้ อ่านบรรทัดสุดท้ายเป็น transcription
	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	raise RuntimeError(result.stderr)

	# moshi จะพิมพ์ผลทีละบรรทัด บรรทัดสุดท้ายคือคำถอดเสียงสมบูรณ์
	lines = [l for l in result.stdout.splitlines() if l.strip()]
	return lines[-1] if lines else "(no output)"

	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath", sources=["upload", "microphone"],
	label="Audio (16-32 kHz)"),
	outputs=gr.Textbox(label="Transcription"),
	title="Kyutai STT-2.6B (Streaming ASR)",
	description=(
	"อัปโหลดหรืออัดเสียงภาษาอังกฤษ แล้วกด Submit เพื่อถอดเสียงด้วยโมเดลขนาด 2.6 B "
	"(ใช้ CLI ของ moshi ภายใน Space)"
	),
	)

	if __name__ == "__main__":
	# share=True จะสร้าง public URL ให้อัตโนมัติหากเปิด “Community GPU” Space
	demo.launch()