Spaces:

v-e-n-o-m
/

urdu-asr-whisper

Sleeping

App Files Files Community

v-e-n-o-m commited on Apr 12

Commit

c4b6df5

1 Parent(s): 2edac3c

Add custom Whisper-large-v3 API with language param

Browse files

Files changed (3) hide show

Dockerfile +12 -0
app.py +47 -0
requirements.txt +6 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 8000
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from transformers import pipeline
+import torch
+import soundfile as sf
+import io
+import numpy as np
+app = FastAPI()
+# Initialize Whisper pipeline (loaded once at startup)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-large-v3",
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+)
+@app.post("/transcribe")
+async def transcribe(audio: UploadFile = File(...), language: str = Form(...)):
+    try:
+        # Validate language
+        valid_languages = {"english": "en", "urdu": "ur", "arabic": "ar"}
+        if language.lower() not in valid_languages:
+            raise HTTPException(status_code=400, detail="Invalid language. Use 'english', 'urdu', or 'arabic'.")
+        # Read audio
+        audio_bytes = await audio.read()
+        audio_file = io.BytesIO(audio_bytes)
+        audio_data, sample_rate = sf.read(audio_file)
+        # Ensure mono, 16kHz
+        if len(audio_data.shape) > 1:
+            audio_data = np.mean(audio_data, axis=1)
+        if sample_rate != 16000:
+            raise HTTPException(status_code=400, detail="Audio must be 16kHz.")
+        # Transcribe with language
+        result = pipe(
+            audio_data,
+            generate_kwargs={"language": valid_languages[language.lower()], "task": "transcribe"},
+            return_timestamps=False,
+        )
+        return {"text": result["text"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi==0.103.2
+uvicorn==0.23.2
+transformers==4.38.2
+torch==2.0.1
+soundfile==0.12.1
+numpy==1.24.3