Spaces:

v-e-n-o-m
/

urdu-whisper-asr

Sleeping

App Files Files Community

v-e-n-o-m commited on Apr 22

Commit

f3b9613

1 Parent(s): 9d1d863

deploy

Browse files

Files changed (2) hide show

app.py +9 -25
requirements.txt +11 -11

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI, File, UploadFile
-from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 import torch
 import io
 import soundfile as sf
@@ -19,9 +19,9 @@ app = FastAPI()
 model_name = "ihanif/whisper-medium-urdu"
 try:
     logger.info(f"Loading processor for {model_name}")
-    processor = AutoProcessor.from_pretrained(model_name, language="urdu", task="transcribe")
     logger.info(f"Loading model for {model_name}")
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, low_cpu_mem_usage=True)
 except Exception as e:
     logger.error(f"Error loading model or processor: {str(e)}")
     raise
@@ -35,22 +35,6 @@ device = "cpu"
 model.to(device)
 logger.info(f"Model loaded and moved to {device}")
-# Status endpoint to verify model loading
-@app.get("/status")
-async def get_status():
-    try:
-        model_parameters = sum(p.numel() for p in model.parameters())
-        return {
-            "model": model_name,
-            "loaded": True,
-            "device": str(device),
-            "parameters": model_parameters,
-            "status": "Model is loaded and ready"
-        }
-    except Exception as e:
-        logger.error(f"Error checking model status: {str(e)}")
-        return {"loaded": False, "error": str(e)}
 @app.post("/transcribe")
 async def transcribe_audio(file: UploadFile = File(...)):
     try:
@@ -80,7 +64,7 @@ async def transcribe_audio(file: UploadFile = File(...)):
             sample_rate = target_sample_rate
             logger.info(f"Resampling completed in {time.time() - step_time:.2f} seconds")
-        # Trim silence
         logger.info("Trimming silence")
         step_time = time.time()
         audio_tensor = torch.from_numpy(audio_data).float()
@@ -99,14 +83,14 @@ async def transcribe_audio(file: UploadFile = File(...)):
         # Generate transcription with timeout
         logger.info("Generating transcription")
         step_time = time.time()
-        @timeout_decorator.timeout(90, timeout_exception=TimeoutError)  # 30-second timeout
         def generate_transcription():
             with torch.no_grad():
                 generated_ids = model.generate(
                     input_features,
-                    max_new_tokens=100,  # Reduced for 5-second audio
-                    num_beams=1,
-                    length_penalty=0.0
                 )
             return generated_ids
@@ -118,7 +102,7 @@ async def transcribe_audio(file: UploadFile = File(...)):
         logger.info(f"Total transcription time: {total_time:.2f} seconds")
         return {"transcription": transcription}
     except TimeoutError:
-        logger.error("Transcription timed out after 30 seconds")
         return {"error": "Transcription took too long. Try a faster model or check Space performance."}
     except Exception as e:
         logger.error(f"Error during transcription: {str(e)}")

 from fastapi import FastAPI, File, UploadFile
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
 import io
 import soundfile as sf
 model_name = "ihanif/whisper-medium-urdu"
 try:
     logger.info(f"Loading processor for {model_name}")
+    processor = WhisperProcessor.from_pretrained(model_name, language="urdu", task="transcribe")
     logger.info(f"Loading model for {model_name}")
+    model = WhisperForConditionalGeneration.from_pretrained(model_name, low_cpu_mem_usage=True)
 except Exception as e:
     logger.error(f"Error loading model or processor: {str(e)}")
     raise
 model.to(device)
 logger.info(f"Model loaded and moved to {device}")
 @app.post("/transcribe")
 async def transcribe_audio(file: UploadFile = File(...)):
     try:
             sample_rate = target_sample_rate
             logger.info(f"Resampling completed in {time.time() - step_time:.2f} seconds")
+        # Trim silence (simplified for torchaudio 2.0.2)
         logger.info("Trimming silence")
         step_time = time.time()
         audio_tensor = torch.from_numpy(audio_data).float()
         # Generate transcription with timeout
         logger.info("Generating transcription")
         step_time = time.time()
+        @timeout_decorator.timeout(15, timeout_exception=TimeoutError)  # 15-second timeout
         def generate_transcription():
             with torch.no_grad():
                 generated_ids = model.generate(
                     input_features,
+                    max_new_tokens=225,
+                    num_beams=1,  # Disable beam search
+                    length_penalty=0.0  # Faster decoding
                 )
             return generated_ids
         logger.info(f"Total transcription time: {total_time:.2f} seconds")
         return {"transcription": transcription}
     except TimeoutError:
+        logger.error("Transcription timed out after 15 seconds")
         return {"error": "Transcription took too long. Try a faster model or check Space performance."}
     except Exception as e:
         logger.error(f"Error during transcription: {str(e)}")

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-transformers
-torch
-fastapi
-uvicorn
-pydantic
-soundfile
-python-multipart
-numpy
-timeout-decorator
-torchaudio
-accelerate

+transformers==4.38.2
+torch==2.0.1
+fastapi==0.103.0
+uvicorn==0.23.2
+pydantic==2.3.0
+soundfile==0.12.1
+python-multipart==0.0.9
+numpy==1.26.4
+timeout-decorator==0.5.0
+torchaudio==2.0.2
+accelerate==0.30.1