Prathamesh Sarjerao Vaidya commited on
Commit
fdcc0cf
·
1 Parent(s): 785835e

made changes to fix docker deployement issue

Browse files
Files changed (6) hide show
  1. Dockerfile +5 -1
  2. model_preloader.py +15 -14
  3. requirements.txt +5 -2
  4. src/speech_recognizer.py +78 -1
  5. startup.py +15 -2
  6. web_app.py +5 -2
Dockerfile CHANGED
@@ -72,7 +72,11 @@ ENV PYTHONPATH=/app \
72
  # Fix executable stack issues
73
  ONNX_EXECUTION_PROVIDER=cpu \
74
  # Disable problematic optimizations
75
- OMP_NUM_THREADS=1
 
 
 
 
76
 
77
  # Expose port for Hugging Face Spaces
78
  EXPOSE 7860
 
72
  # Fix executable stack issues
73
  ONNX_EXECUTION_PROVIDER=cpu \
74
  # Disable problematic optimizations
75
+ OMP_NUM_THREADS=1 \
76
+ # Suppress tensorboard warnings
77
+ TF_ENABLE_ONEDNN_OPTS=0 \
78
+ # Disable problematic features
79
+ DISABLE_ONNX_EXECUTION_PROVIDERS=CPUExecutionProvider
80
 
81
  # Expose port for Hugging Face Spaces
82
  EXPOSE 7860
model_preloader.py CHANGED
@@ -266,7 +266,7 @@ class ModelPreloader:
266
  torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
267
  )
268
 
269
- console.print(f"[green] Successfully loaded {model_name} from local cache[/green]")
270
 
271
  except Exception as e:
272
  console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
@@ -286,7 +286,7 @@ class ModelPreloader:
286
  torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
287
  )
288
 
289
- console.print(f"[green] Successfully downloaded and loaded {model_name}[/green]")
290
 
291
  # Move to device if needed
292
  if self.device != "cpu":
@@ -377,12 +377,12 @@ class ModelPreloader:
377
  )
378
 
379
  # Test the pipeline
380
- console.print(f"[green] pyannote.audio pipeline loaded successfully on {self.device}[/green]")
381
 
382
  return pipeline
383
 
384
  except Exception as e:
385
- console.print(f"[red] Failed to load pyannote.audio pipeline: {e}[/red]")
386
  logger.error(f"Pyannote loading failed: {e}")
387
  return None
388
 
@@ -409,12 +409,12 @@ class ModelPreloader:
409
  dummy_audio = np.zeros(16000, dtype=np.float32) # 1 second of silence
410
  result = model.transcribe(dummy_audio, language="en")
411
 
412
- console.print(f"[green] Whisper model loaded successfully on {self.device}[/green]")
413
 
414
  return model
415
 
416
  except Exception as e:
417
- console.print(f"[red] Failed to load Whisper model: {e}[/red]")
418
  logger.error(f"Whisper loading failed: {e}")
419
  return None
420
 
@@ -444,7 +444,7 @@ class ModelPreloader:
444
  sys_info = self.get_system_info()
445
 
446
  info_panel = Panel.fit(
447
- f"""🖥️ System Information
448
 
449
  • CPU Cores: {sys_info['cpu_count']}
450
  • Total Memory: {sys_info['memory_gb']} GB
@@ -487,7 +487,7 @@ class ModelPreloader:
487
 
488
  # Check cache first
489
  if self.check_model_cache(model_key):
490
- console.print(f"[green] {config['description']} found in cache[/green]")
491
  progress.update(task_id, completed=100)
492
  progress.update(main_task, advance=1)
493
  results["models"][model_key] = {"status": "cached", "time": 0}
@@ -541,13 +541,13 @@ class ModelPreloader:
541
  # Summary
542
  console.print()
543
  if results["success_count"] == results["total_count"]:
544
- status_text = "[bold green] All models loaded successfully![/bold green]"
545
  status_color = "green"
546
  elif results["success_count"] > 0:
547
- status_text = f"[bold yellow] {results['success_count']}/{results['total_count']} models loaded[/bold yellow]"
548
  status_color = "yellow"
549
  else:
550
- status_text = "[bold red] No models loaded successfully[/bold red]"
551
  status_color = "red"
552
 
553
  summary_panel = Panel.fit(
@@ -577,8 +577,9 @@ class ModelPreloader:
577
 
578
  def main():
579
  """Main function to run model preloading."""
 
580
  console.print(Panel.fit(
581
- "[bold blue]🎵 Multilingual Audio Intelligence System[/bold blue]\n[yellow]Model Preloader[/yellow]",
582
  border_style="blue"
583
  ))
584
  console.print()
@@ -591,11 +592,11 @@ def main():
591
  results = preloader.preload_all_models()
592
 
593
  if results["success_count"] > 0:
594
- console.print("\n[bold green] Model preloading completed![/bold green]")
595
  console.print(f"[dim]Models cached in: {preloader.cache_dir}[/dim]")
596
  return True
597
  else:
598
- console.print("\n[bold red] Model preloading failed![/bold red]")
599
  return False
600
 
601
  except KeyboardInterrupt:
 
266
  torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
267
  )
268
 
269
+ console.print(f"[green]SUCCESS: Successfully loaded {model_name} from local cache[/green]")
270
 
271
  except Exception as e:
272
  console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
 
286
  torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
287
  )
288
 
289
+ console.print(f"[green]SUCCESS: Successfully downloaded and loaded {model_name}[/green]")
290
 
291
  # Move to device if needed
292
  if self.device != "cpu":
 
377
  )
378
 
379
  # Test the pipeline
380
+ console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
381
 
382
  return pipeline
383
 
384
  except Exception as e:
385
+ console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
386
  logger.error(f"Pyannote loading failed: {e}")
387
  return None
388
 
 
409
  dummy_audio = np.zeros(16000, dtype=np.float32) # 1 second of silence
410
  result = model.transcribe(dummy_audio, language="en")
411
 
412
+ console.print(f"[green]SUCCESS: Whisper model loaded successfully on {self.device}[/green]")
413
 
414
  return model
415
 
416
  except Exception as e:
417
+ console.print(f"[red]ERROR: Failed to load Whisper model: {e}[/red]")
418
  logger.error(f"Whisper loading failed: {e}")
419
  return None
420
 
 
444
  sys_info = self.get_system_info()
445
 
446
  info_panel = Panel.fit(
447
+ f"""System Information
448
 
449
  • CPU Cores: {sys_info['cpu_count']}
450
  • Total Memory: {sys_info['memory_gb']} GB
 
487
 
488
  # Check cache first
489
  if self.check_model_cache(model_key):
490
+ console.print(f"[green]SUCCESS: {config['description']} found in cache[/green]")
491
  progress.update(task_id, completed=100)
492
  progress.update(main_task, advance=1)
493
  results["models"][model_key] = {"status": "cached", "time": 0}
 
541
  # Summary
542
  console.print()
543
  if results["success_count"] == results["total_count"]:
544
+ status_text = "[bold green]SUCCESS: All models loaded successfully![/bold green]"
545
  status_color = "green"
546
  elif results["success_count"] > 0:
547
+ status_text = f"[bold yellow]WARNING: {results['success_count']}/{results['total_count']} models loaded[/bold yellow]"
548
  status_color = "yellow"
549
  else:
550
+ status_text = "[bold red]ERROR: No models loaded successfully[/bold red]"
551
  status_color = "red"
552
 
553
  summary_panel = Panel.fit(
 
577
 
578
  def main():
579
  """Main function to run model preloading."""
580
+ # Use ASCII-safe characters for Windows compatibility
581
  console.print(Panel.fit(
582
+ "[bold blue]Multilingual Audio Intelligence System[/bold blue]\n[yellow]Model Preloader[/yellow]",
583
  border_style="blue"
584
  ))
585
  console.print()
 
592
  results = preloader.preload_all_models()
593
 
594
  if results["success_count"] > 0:
595
+ console.print("\n[bold green]SUCCESS: Model preloading completed![/bold green]")
596
  console.print(f"[dim]Models cached in: {preloader.cache_dir}[/dim]")
597
  return True
598
  else:
599
+ console.print("\n[bold red]ERROR: Model preloading failed![/bold red]")
600
  return False
601
 
602
  except KeyboardInterrupt:
requirements.txt CHANGED
@@ -25,11 +25,11 @@ pyannote.metrics==3.2.1
25
  # Performance & Optimization
26
  numba==0.58.1
27
  # Use CPU-only onnxruntime to avoid executable stack issues
28
- onnxruntime-cpu==1.16.3
29
  accelerate==0.20.3
30
 
31
  # Core Utilities
32
- numpy==1.24.3
33
  psutil==5.9.6
34
  python-dotenv==1.0.0
35
  requests==2.31.0
@@ -56,6 +56,7 @@ deep-translator==1.11.4
56
  # Scientific Computing
57
  scipy==1.11.4
58
  matplotlib==3.7.3
 
59
  scikit-learn==1.3.2
60
 
61
  # PS-6 Specific Dependencies (HF Spaces compatible)
@@ -73,6 +74,8 @@ rich==13.7.0
73
 
74
  # Machine Learning
75
  tensorflow==2.15.0
 
 
76
 
77
  # Additional Dependencies
78
  huggingface-hub==0.16.4
 
25
  # Performance & Optimization
26
  numba==0.58.1
27
  # Use CPU-only onnxruntime to avoid executable stack issues
28
+ onnxruntime==1.16.3
29
  accelerate==0.20.3
30
 
31
  # Core Utilities
32
+ numpy
33
  psutil==5.9.6
34
  python-dotenv==1.0.0
35
  requests==2.31.0
 
56
  # Scientific Computing
57
  scipy==1.11.4
58
  matplotlib==3.7.3
59
+ plotly==5.17.0
60
  scikit-learn==1.3.2
61
 
62
  # PS-6 Specific Dependencies (HF Spaces compatible)
 
74
 
75
  # Machine Learning
76
  tensorflow==2.15.0
77
+ # Fix tensorboard compatibility
78
+ tensorboard==2.15.2
79
 
80
  # Additional Dependencies
81
  huggingface-hub==0.16.4
src/speech_recognizer.py CHANGED
@@ -55,6 +55,9 @@ class TranscriptionSegment:
55
  language_probability: float
56
  no_speech_probability: float
57
  words: Optional[List[Dict]] = None
 
 
 
58
 
59
 
60
  class SpeechRecognizer:
@@ -160,7 +163,10 @@ class SpeechRecognizer:
160
  language=result.get("language", "unknown"),
161
  language_probability=result.get("language_probability", 1.0),
162
  no_speech_probability=segment.get("no_speech_prob", 0.0),
163
- words=words
 
 
 
164
  ))
165
 
166
  return segments
@@ -193,6 +199,77 @@ class SpeechRecognizer:
193
  logger.error(f"File transcription failed: {e}")
194
  raise
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def get_supported_languages(self) -> List[str]:
197
  """Get list of supported language codes."""
198
  return [
 
55
  language_probability: float
56
  no_speech_probability: float
57
  words: Optional[List[Dict]] = None
58
+ speaker_id: Optional[str] = None
59
+ confidence: Optional[float] = None
60
+ word_timestamps: Optional[List[Dict]] = None
61
 
62
 
63
  class SpeechRecognizer:
 
163
  language=result.get("language", "unknown"),
164
  language_probability=result.get("language_probability", 1.0),
165
  no_speech_probability=segment.get("no_speech_prob", 0.0),
166
+ words=words,
167
+ speaker_id=None,
168
+ confidence=1.0 - segment.get("no_speech_prob", 0.0),
169
+ word_timestamps=words
170
  ))
171
 
172
  return segments
 
199
  logger.error(f"File transcription failed: {e}")
200
  raise
201
 
202
+ def transcribe_segments(self, audio_data: np.ndarray, sample_rate: int,
203
+ speaker_segments: List[Tuple[float, float, str]],
204
+ word_timestamps: bool = True) -> List[TranscriptionSegment]:
205
+ """
206
+ Transcribe audio segments with speaker information.
207
+
208
+ Args:
209
+ audio_data: Audio data as numpy array
210
+ sample_rate: Sample rate of the audio
211
+ speaker_segments: List of (start_time, end_time, speaker_id) tuples
212
+ word_timestamps: Whether to include word-level timestamps
213
+
214
+ Returns:
215
+ List of TranscriptionSegment objects with speaker information
216
+ """
217
+ if self.model is None:
218
+ raise RuntimeError("Model not initialized")
219
+
220
+ try:
221
+ # Prepare audio for Whisper (expects 16kHz)
222
+ if sample_rate != 16000:
223
+ import librosa
224
+ audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
225
+
226
+ # Transcribe the entire audio first
227
+ result = self.model.transcribe(
228
+ audio_data,
229
+ language=self.language,
230
+ word_timestamps=word_timestamps,
231
+ verbose=False
232
+ )
233
+
234
+ # Convert to our format and add speaker information
235
+ segments = []
236
+ for segment in result["segments"]:
237
+ # Find the speaker for this segment
238
+ speaker_id = "Unknown"
239
+ for start_time, end_time, spk_id in speaker_segments:
240
+ if (segment["start"] >= start_time and segment["end"] <= end_time):
241
+ speaker_id = spk_id
242
+ break
243
+
244
+ words = []
245
+ if word_timestamps and "words" in segment:
246
+ for word in segment["words"]:
247
+ words.append({
248
+ "word": word["word"],
249
+ "start": word["start"],
250
+ "end": word["end"],
251
+ "probability": word.get("probability", 1.0)
252
+ })
253
+
254
+ segments.append(TranscriptionSegment(
255
+ start=segment["start"],
256
+ end=segment["end"],
257
+ text=segment["text"].strip(),
258
+ language=result.get("language", "unknown"),
259
+ language_probability=result.get("language_probability", 1.0),
260
+ no_speech_probability=segment.get("no_speech_prob", 0.0),
261
+ words=words,
262
+ speaker_id=speaker_id, # Add speaker information
263
+ confidence=1.0 - segment.get("no_speech_prob", 0.0),
264
+ word_timestamps=words
265
+ ))
266
+
267
+ return segments
268
+
269
+ except Exception as e:
270
+ logger.error(f"Segment transcription failed: {e}")
271
+ raise
272
+
273
  def get_supported_languages(self) -> List[str]:
274
  """Get list of supported language codes."""
275
  return [
startup.py CHANGED
@@ -58,12 +58,21 @@ def preload_models():
58
  import model_preloader
59
  logger.info('✅ Model preloader module found')
60
 
 
 
 
 
 
 
 
 
61
  # Try to run the preloader
62
  result = subprocess.run(
63
  ['python', 'model_preloader.py'],
64
  capture_output=True,
65
  text=True,
66
- timeout=300 # 5 minute timeout
 
67
  )
68
 
69
  if result.returncode == 0:
@@ -74,7 +83,11 @@ def preload_models():
74
  else:
75
  logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
76
  if result.stderr:
77
- logger.warning(f'Preloader stderr: {result.stderr[:500]}...')
 
 
 
 
78
  return False
79
 
80
  except subprocess.TimeoutExpired:
 
58
  import model_preloader
59
  logger.info('✅ Model preloader module found')
60
 
61
+ # Set environment variables to handle onnxruntime issues
62
+ env = os.environ.copy()
63
+ env.update({
64
+ 'ONNX_EXECUTION_PROVIDER': 'cpu',
65
+ 'DISABLE_ONNX_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
66
+ 'TF_ENABLE_ONEDNN_OPTS': '0'
67
+ })
68
+
69
  # Try to run the preloader
70
  result = subprocess.run(
71
  ['python', 'model_preloader.py'],
72
  capture_output=True,
73
  text=True,
74
+ timeout=300, # 5 minute timeout
75
+ env=env
76
  )
77
 
78
  if result.returncode == 0:
 
83
  else:
84
  logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
85
  if result.stderr:
86
+ # Check if it's the onnxruntime issue
87
+ if 'cannot enable executable stack' in result.stderr:
88
+ logger.warning('⚠️ ONNX Runtime executable stack issue detected - this is expected in containers')
89
+ else:
90
+ logger.warning(f'Preloader stderr: {result.stderr[:500]}...')
91
  return False
92
 
93
  except subprocess.TimeoutExpired:
web_app.py CHANGED
@@ -44,8 +44,11 @@ import numpy as np
44
  import pandas as pd
45
  from dotenv import load_dotenv
46
 
47
- # Load environment variables
48
- load_dotenv()
 
 
 
49
 
50
  # Add src directory to Python path
51
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
 
44
  import pandas as pd
45
  from dotenv import load_dotenv
46
 
47
+ # Load environment variables with error handling
48
+ try:
49
+ load_dotenv()
50
+ except Exception as e:
51
+ logging.warning(f"Could not load .env file: {e}")
52
 
53
  # Add src directory to Python path
54
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))