from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from transformers import pipeline
import torch
import soundfile as sf
import io
import numpy as np

app = FastAPI()

# Initialize Whisper pipeline (loaded once at startup)
pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-large-v3",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device="cuda" if torch.cuda.is_available() else "cpu",
)

@app.post("/transcribe")
async def transcribe(audio: UploadFile = File(...), language: str = Form(...)):
    try:
        # Validate language
        valid_languages = {"english": "en", "urdu": "ur", "arabic": "ar"}
        if language.lower() not in valid_languages:
            raise HTTPException(status_code=400, detail="Invalid language. Use 'english', 'urdu', or 'arabic'.")

        # Read audio
        audio_bytes = await audio.read()
        audio_file = io.BytesIO(audio_bytes)
        audio_data, sample_rate = sf.read(audio_file)

        # Ensure mono, 16kHz
        if len(audio_data.shape) > 1:
            audio_data = np.mean(audio_data, axis=1)
        if sample_rate != 16000:
            raise HTTPException(status_code=400, detail="Audio must be 16kHz.")

        # Transcribe with language
        result = pipe(
            audio_data,
            generate_kwargs={"language": valid_languages[language.lower()], "task": "transcribe"},
            return_timestamps=False,
        )

        return {"text": result["text"]}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")