v-e-n-o-m commited on
Commit
c4b6df5
·
1 Parent(s): 2edac3c

Add custom Whisper-large-v3 API with language param

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -0
  2. app.py +47 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app.py .
9
+
10
+ EXPOSE 8000
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
2
+ from transformers import pipeline
3
+ import torch
4
+ import soundfile as sf
5
+ import io
6
+ import numpy as np
7
+
8
+ app = FastAPI()
9
+
10
+ # Initialize Whisper pipeline (loaded once at startup)
11
+ pipe = pipeline(
12
+ "automatic-speech-recognition",
13
+ model="openai/whisper-large-v3",
14
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
+ device="cuda" if torch.cuda.is_available() else "cpu",
16
+ )
17
+
18
+ @app.post("/transcribe")
19
+ async def transcribe(audio: UploadFile = File(...), language: str = Form(...)):
20
+ try:
21
+ # Validate language
22
+ valid_languages = {"english": "en", "urdu": "ur", "arabic": "ar"}
23
+ if language.lower() not in valid_languages:
24
+ raise HTTPException(status_code=400, detail="Invalid language. Use 'english', 'urdu', or 'arabic'.")
25
+
26
+ # Read audio
27
+ audio_bytes = await audio.read()
28
+ audio_file = io.BytesIO(audio_bytes)
29
+ audio_data, sample_rate = sf.read(audio_file)
30
+
31
+ # Ensure mono, 16kHz
32
+ if len(audio_data.shape) > 1:
33
+ audio_data = np.mean(audio_data, axis=1)
34
+ if sample_rate != 16000:
35
+ raise HTTPException(status_code=400, detail="Audio must be 16kHz.")
36
+
37
+ # Transcribe with language
38
+ result = pipe(
39
+ audio_data,
40
+ generate_kwargs={"language": valid_languages[language.lower()], "task": "transcribe"},
41
+ return_timestamps=False,
42
+ )
43
+
44
+ return {"text": result["text"]}
45
+
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.103.2
2
+ uvicorn==0.23.2
3
+ transformers==4.38.2
4
+ torch==2.0.1
5
+ soundfile==0.12.1
6
+ numpy==1.24.3