Spaces:

v-e-n-o-m
/

urdu-asr-whisper

Sleeping

urdu-asr-whisper / app.py

Add custom Whisper-large-v3 API with language param

c4b6df5 3 months ago

1.63 kB

	from fastapi import FastAPI, File, UploadFile, Form, HTTPException
	from transformers import pipeline
	import torch
	import soundfile as sf
	import io
	import numpy as np

	app = FastAPI()

	# Initialize Whisper pipeline (loaded once at startup)
	pipe = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-large-v3",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)

	@app.post("/transcribe")
	async def transcribe(audio: UploadFile = File(...), language: str = Form(...)):
	try:
	# Validate language
	valid_languages = {"english": "en", "urdu": "ur", "arabic": "ar"}
	if language.lower() not in valid_languages:
	raise HTTPException(status_code=400, detail="Invalid language. Use 'english', 'urdu', or 'arabic'.")

	# Read audio
	audio_bytes = await audio.read()
	audio_file = io.BytesIO(audio_bytes)
	audio_data, sample_rate = sf.read(audio_file)

	# Ensure mono, 16kHz
	if len(audio_data.shape) > 1:
	audio_data = np.mean(audio_data, axis=1)
	if sample_rate != 16000:
	raise HTTPException(status_code=400, detail="Audio must be 16kHz.")

	# Transcribe with language
	result = pipe(
	audio_data,
	generate_kwargs={"language": valid_languages[language.lower()], "task": "transcribe"},
	return_timestamps=False,
	)

	return {"text": result["text"]}

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")