v-e-n-o-m commited on
Commit
f3b9613
·
1 Parent(s): 9d1d863
Files changed (2) hide show
  1. app.py +9 -25
  2. requirements.txt +11 -11
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import FastAPI, File, UploadFile
2
- from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
3
  import torch
4
  import io
5
  import soundfile as sf
@@ -19,9 +19,9 @@ app = FastAPI()
19
  model_name = "ihanif/whisper-medium-urdu"
20
  try:
21
  logger.info(f"Loading processor for {model_name}")
22
- processor = AutoProcessor.from_pretrained(model_name, language="urdu", task="transcribe")
23
  logger.info(f"Loading model for {model_name}")
24
- model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, low_cpu_mem_usage=True)
25
  except Exception as e:
26
  logger.error(f"Error loading model or processor: {str(e)}")
27
  raise
@@ -35,22 +35,6 @@ device = "cpu"
35
  model.to(device)
36
  logger.info(f"Model loaded and moved to {device}")
37
 
38
- # Status endpoint to verify model loading
39
- @app.get("/status")
40
- async def get_status():
41
- try:
42
- model_parameters = sum(p.numel() for p in model.parameters())
43
- return {
44
- "model": model_name,
45
- "loaded": True,
46
- "device": str(device),
47
- "parameters": model_parameters,
48
- "status": "Model is loaded and ready"
49
- }
50
- except Exception as e:
51
- logger.error(f"Error checking model status: {str(e)}")
52
- return {"loaded": False, "error": str(e)}
53
-
54
  @app.post("/transcribe")
55
  async def transcribe_audio(file: UploadFile = File(...)):
56
  try:
@@ -80,7 +64,7 @@ async def transcribe_audio(file: UploadFile = File(...)):
80
  sample_rate = target_sample_rate
81
  logger.info(f"Resampling completed in {time.time() - step_time:.2f} seconds")
82
 
83
- # Trim silence
84
  logger.info("Trimming silence")
85
  step_time = time.time()
86
  audio_tensor = torch.from_numpy(audio_data).float()
@@ -99,14 +83,14 @@ async def transcribe_audio(file: UploadFile = File(...)):
99
  # Generate transcription with timeout
100
  logger.info("Generating transcription")
101
  step_time = time.time()
102
- @timeout_decorator.timeout(90, timeout_exception=TimeoutError) # 30-second timeout
103
  def generate_transcription():
104
  with torch.no_grad():
105
  generated_ids = model.generate(
106
  input_features,
107
- max_new_tokens=100, # Reduced for 5-second audio
108
- num_beams=1,
109
- length_penalty=0.0
110
  )
111
  return generated_ids
112
 
@@ -118,7 +102,7 @@ async def transcribe_audio(file: UploadFile = File(...)):
118
  logger.info(f"Total transcription time: {total_time:.2f} seconds")
119
  return {"transcription": transcription}
120
  except TimeoutError:
121
- logger.error("Transcription timed out after 30 seconds")
122
  return {"error": "Transcription took too long. Try a faster model or check Space performance."}
123
  except Exception as e:
124
  logger.error(f"Error during transcription: {str(e)}")
 
1
  from fastapi import FastAPI, File, UploadFile
2
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
  import io
5
  import soundfile as sf
 
19
  model_name = "ihanif/whisper-medium-urdu"
20
  try:
21
  logger.info(f"Loading processor for {model_name}")
22
+ processor = WhisperProcessor.from_pretrained(model_name, language="urdu", task="transcribe")
23
  logger.info(f"Loading model for {model_name}")
24
+ model = WhisperForConditionalGeneration.from_pretrained(model_name, low_cpu_mem_usage=True)
25
  except Exception as e:
26
  logger.error(f"Error loading model or processor: {str(e)}")
27
  raise
 
35
  model.to(device)
36
  logger.info(f"Model loaded and moved to {device}")
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  @app.post("/transcribe")
39
  async def transcribe_audio(file: UploadFile = File(...)):
40
  try:
 
64
  sample_rate = target_sample_rate
65
  logger.info(f"Resampling completed in {time.time() - step_time:.2f} seconds")
66
 
67
+ # Trim silence (simplified for torchaudio 2.0.2)
68
  logger.info("Trimming silence")
69
  step_time = time.time()
70
  audio_tensor = torch.from_numpy(audio_data).float()
 
83
  # Generate transcription with timeout
84
  logger.info("Generating transcription")
85
  step_time = time.time()
86
+ @timeout_decorator.timeout(15, timeout_exception=TimeoutError) # 15-second timeout
87
  def generate_transcription():
88
  with torch.no_grad():
89
  generated_ids = model.generate(
90
  input_features,
91
+ max_new_tokens=225,
92
+ num_beams=1, # Disable beam search
93
+ length_penalty=0.0 # Faster decoding
94
  )
95
  return generated_ids
96
 
 
102
  logger.info(f"Total transcription time: {total_time:.2f} seconds")
103
  return {"transcription": transcription}
104
  except TimeoutError:
105
+ logger.error("Transcription timed out after 15 seconds")
106
  return {"error": "Transcription took too long. Try a faster model or check Space performance."}
107
  except Exception as e:
108
  logger.error(f"Error during transcription: {str(e)}")
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- transformers
2
- torch
3
- fastapi
4
- uvicorn
5
- pydantic
6
- soundfile
7
- python-multipart
8
- numpy
9
- timeout-decorator
10
- torchaudio
11
- accelerate
 
1
+ transformers==4.38.2
2
+ torch==2.0.1
3
+ fastapi==0.103.0
4
+ uvicorn==0.23.2
5
+ pydantic==2.3.0
6
+ soundfile==0.12.1
7
+ python-multipart==0.0.9
8
+ numpy==1.26.4
9
+ timeout-decorator==0.5.0
10
+ torchaudio==2.0.2
11
+ accelerate==0.30.1