Spaces:

prathameshv07
/

Multilingual-Audio-Intelligence-System

Sleeping

App Files Files Community

Prathamesh Sarjerao Vaidya commited on 9 days ago

Commit

fdcc0cf

1 Parent(s): 785835e

made changes to fix docker deployement issue

Browse files

Files changed (6) hide show

Dockerfile +5 -1
model_preloader.py +15 -14
requirements.txt +5 -2
src/speech_recognizer.py +78 -1
startup.py +15 -2
web_app.py +5 -2

Dockerfile CHANGED Viewed

@@ -72,7 +72,11 @@ ENV PYTHONPATH=/app \
     # Fix executable stack issues
     ONNX_EXECUTION_PROVIDER=cpu \
     # Disable problematic optimizations
-    OMP_NUM_THREADS=1
 # Expose port for Hugging Face Spaces
 EXPOSE 7860

     # Fix executable stack issues
     ONNX_EXECUTION_PROVIDER=cpu \
     # Disable problematic optimizations
+    OMP_NUM_THREADS=1 \
+    # Suppress tensorboard warnings
+    TF_ENABLE_ONEDNN_OPTS=0 \
+    # Disable problematic features
+    DISABLE_ONNX_EXECUTION_PROVIDERS=CPUExecutionProvider
 # Expose port for Hugging Face Spaces
 EXPOSE 7860

model_preloader.py CHANGED Viewed

@@ -266,7 +266,7 @@ class ModelPreloader:
                         torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
                     )
-                    console.print(f"[green]✓ Successfully loaded {model_name} from local cache[/green]")
                 except Exception as e:
                     console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
@@ -286,7 +286,7 @@ class ModelPreloader:
                     torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
                 )
-                console.print(f"[green]✓ Successfully downloaded and loaded {model_name}[/green]")
             # Move to device if needed
             if self.device != "cpu":
@@ -377,12 +377,12 @@ class ModelPreloader:
             )
             # Test the pipeline
-            console.print(f"[green]✓ pyannote.audio pipeline loaded successfully on {self.device}[/green]")
             return pipeline
         except Exception as e:
-            console.print(f"[red]✗ Failed to load pyannote.audio pipeline: {e}[/red]")
             logger.error(f"Pyannote loading failed: {e}")
             return None
@@ -409,12 +409,12 @@ class ModelPreloader:
             dummy_audio = np.zeros(16000, dtype=np.float32)  # 1 second of silence
             result = model.transcribe(dummy_audio, language="en")
-            console.print(f"[green]✓ Whisper model loaded successfully on {self.device}[/green]")
             return model
         except Exception as e:
-            console.print(f"[red]✗ Failed to load Whisper model: {e}[/red]")
             logger.error(f"Whisper loading failed: {e}")
             return None
@@ -444,7 +444,7 @@ class ModelPreloader:
         sys_info = self.get_system_info()
         info_panel = Panel.fit(
-            f"""🖥️  System Information
 • CPU Cores: {sys_info['cpu_count']}
 • Total Memory: {sys_info['memory_gb']} GB
@@ -487,7 +487,7 @@ class ModelPreloader:
                 # Check cache first
                 if self.check_model_cache(model_key):
-                    console.print(f"[green]✓ {config['description']} found in cache[/green]")
                     progress.update(task_id, completed=100)
                     progress.update(main_task, advance=1)
                     results["models"][model_key] = {"status": "cached", "time": 0}
@@ -541,13 +541,13 @@ class ModelPreloader:
         # Summary
         console.print()
         if results["success_count"] == results["total_count"]:
-            status_text = "[bold green]✓ All models loaded successfully![/bold green]"
             status_color = "green"
         elif results["success_count"] > 0:
-            status_text = f"[bold yellow]⚠ {results['success_count']}/{results['total_count']} models loaded[/bold yellow]"
             status_color = "yellow"
         else:
-            status_text = "[bold red]✗ No models loaded successfully[/bold red]"
             status_color = "red"
         summary_panel = Panel.fit(
@@ -577,8 +577,9 @@ class ModelPreloader:
 def main():
     """Main function to run model preloading."""
     console.print(Panel.fit(
-        "[bold blue]🎵 Multilingual Audio Intelligence System[/bold blue]\n[yellow]Model Preloader[/yellow]",
         border_style="blue"
     ))
     console.print()
@@ -591,11 +592,11 @@ def main():
         results = preloader.preload_all_models()
         if results["success_count"] > 0:
-            console.print("\n[bold green]✓ Model preloading completed![/bold green]")
             console.print(f"[dim]Models cached in: {preloader.cache_dir}[/dim]")
             return True
         else:
-            console.print("\n[bold red]✗ Model preloading failed![/bold red]")
             return False
     except KeyboardInterrupt:

                         torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
                     )
+                    console.print(f"[green]SUCCESS: Successfully loaded {model_name} from local cache[/green]")
                 except Exception as e:
                     console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
                     torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
                 )
+                console.print(f"[green]SUCCESS: Successfully downloaded and loaded {model_name}[/green]")
             # Move to device if needed
             if self.device != "cpu":
             )
             # Test the pipeline
+            console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
             return pipeline
         except Exception as e:
+            console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
             logger.error(f"Pyannote loading failed: {e}")
             return None
             dummy_audio = np.zeros(16000, dtype=np.float32)  # 1 second of silence
             result = model.transcribe(dummy_audio, language="en")
+            console.print(f"[green]SUCCESS: Whisper model loaded successfully on {self.device}[/green]")
             return model
         except Exception as e:
+            console.print(f"[red]ERROR: Failed to load Whisper model: {e}[/red]")
             logger.error(f"Whisper loading failed: {e}")
             return None
         sys_info = self.get_system_info()
         info_panel = Panel.fit(
+            f"""System Information
 • CPU Cores: {sys_info['cpu_count']}
 • Total Memory: {sys_info['memory_gb']} GB
                 # Check cache first
                 if self.check_model_cache(model_key):
+                    console.print(f"[green]SUCCESS: {config['description']} found in cache[/green]")
                     progress.update(task_id, completed=100)
                     progress.update(main_task, advance=1)
                     results["models"][model_key] = {"status": "cached", "time": 0}
         # Summary
         console.print()
         if results["success_count"] == results["total_count"]:
+            status_text = "[bold green]SUCCESS: All models loaded successfully![/bold green]"
             status_color = "green"
         elif results["success_count"] > 0:
+            status_text = f"[bold yellow]WARNING: {results['success_count']}/{results['total_count']} models loaded[/bold yellow]"
             status_color = "yellow"
         else:
+            status_text = "[bold red]ERROR: No models loaded successfully[/bold red]"
             status_color = "red"
         summary_panel = Panel.fit(
 def main():
     """Main function to run model preloading."""
+    # Use ASCII-safe characters for Windows compatibility
     console.print(Panel.fit(
+        "[bold blue]Multilingual Audio Intelligence System[/bold blue]\n[yellow]Model Preloader[/yellow]",
         border_style="blue"
     ))
     console.print()
         results = preloader.preload_all_models()
         if results["success_count"] > 0:
+            console.print("\n[bold green]SUCCESS: Model preloading completed![/bold green]")
             console.print(f"[dim]Models cached in: {preloader.cache_dir}[/dim]")
             return True
         else:
+            console.print("\n[bold red]ERROR: Model preloading failed![/bold red]")
             return False
     except KeyboardInterrupt:

requirements.txt CHANGED Viewed

@@ -25,11 +25,11 @@ pyannote.metrics==3.2.1
 # Performance & Optimization
 numba==0.58.1
 # Use CPU-only onnxruntime to avoid executable stack issues
-onnxruntime-cpu==1.16.3
 accelerate==0.20.3
 # Core Utilities
-numpy==1.24.3
 psutil==5.9.6
 python-dotenv==1.0.0
 requests==2.31.0
@@ -56,6 +56,7 @@ deep-translator==1.11.4
 # Scientific Computing
 scipy==1.11.4
 matplotlib==3.7.3
 scikit-learn==1.3.2
 # PS-6 Specific Dependencies (HF Spaces compatible)
@@ -73,6 +74,8 @@ rich==13.7.0
 # Machine Learning
 tensorflow==2.15.0
 # Additional Dependencies
 huggingface-hub==0.16.4

 # Performance & Optimization
 numba==0.58.1
 # Use CPU-only onnxruntime to avoid executable stack issues
+onnxruntime==1.16.3
 accelerate==0.20.3
 # Core Utilities
+numpy
 psutil==5.9.6
 python-dotenv==1.0.0
 requests==2.31.0
 # Scientific Computing
 scipy==1.11.4
 matplotlib==3.7.3
+plotly==5.17.0
 scikit-learn==1.3.2
 # PS-6 Specific Dependencies (HF Spaces compatible)
 # Machine Learning
 tensorflow==2.15.0
+# Fix tensorboard compatibility
+tensorboard==2.15.2
 # Additional Dependencies
 huggingface-hub==0.16.4

src/speech_recognizer.py CHANGED Viewed

@@ -55,6 +55,9 @@ class TranscriptionSegment:
     language_probability: float
     no_speech_probability: float
     words: Optional[List[Dict]] = None
 class SpeechRecognizer:
@@ -160,7 +163,10 @@ class SpeechRecognizer:
                     language=result.get("language", "unknown"),
                     language_probability=result.get("language_probability", 1.0),
                     no_speech_probability=segment.get("no_speech_prob", 0.0),
-                    words=words
                 ))
             return segments
@@ -193,6 +199,77 @@ class SpeechRecognizer:
             logger.error(f"File transcription failed: {e}")
             raise
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes."""
         return [

     language_probability: float
     no_speech_probability: float
     words: Optional[List[Dict]] = None
+    speaker_id: Optional[str] = None
+    confidence: Optional[float] = None
+    word_timestamps: Optional[List[Dict]] = None
 class SpeechRecognizer:
                     language=result.get("language", "unknown"),
                     language_probability=result.get("language_probability", 1.0),
                     no_speech_probability=segment.get("no_speech_prob", 0.0),
+                    words=words,
+                    speaker_id=None,
+                    confidence=1.0 - segment.get("no_speech_prob", 0.0),
+                    word_timestamps=words
                 ))
             return segments
             logger.error(f"File transcription failed: {e}")
             raise
+    def transcribe_segments(self, audio_data: np.ndarray, sample_rate: int,
+                           speaker_segments: List[Tuple[float, float, str]],
+                           word_timestamps: bool = True) -> List[TranscriptionSegment]:
+        """
+        Transcribe audio segments with speaker information.
+        Args:
+            audio_data: Audio data as numpy array
+            sample_rate: Sample rate of the audio
+            speaker_segments: List of (start_time, end_time, speaker_id) tuples
+            word_timestamps: Whether to include word-level timestamps
+        Returns:
+            List of TranscriptionSegment objects with speaker information
+        """
+        if self.model is None:
+            raise RuntimeError("Model not initialized")
+        try:
+            # Prepare audio for Whisper (expects 16kHz)
+            if sample_rate != 16000:
+                import librosa
+                audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
+            # Transcribe the entire audio first
+            result = self.model.transcribe(
+                audio_data,
+                language=self.language,
+                word_timestamps=word_timestamps,
+                verbose=False
+            )
+            # Convert to our format and add speaker information
+            segments = []
+            for segment in result["segments"]:
+                # Find the speaker for this segment
+                speaker_id = "Unknown"
+                for start_time, end_time, spk_id in speaker_segments:
+                    if (segment["start"] >= start_time and segment["end"] <= end_time):
+                        speaker_id = spk_id
+                        break
+                words = []
+                if word_timestamps and "words" in segment:
+                    for word in segment["words"]:
+                        words.append({
+                            "word": word["word"],
+                            "start": word["start"],
+                            "end": word["end"],
+                            "probability": word.get("probability", 1.0)
+                        })
+                segments.append(TranscriptionSegment(
+                    start=segment["start"],
+                    end=segment["end"],
+                    text=segment["text"].strip(),
+                    language=result.get("language", "unknown"),
+                    language_probability=result.get("language_probability", 1.0),
+                    no_speech_probability=segment.get("no_speech_prob", 0.0),
+                    words=words,
+                    speaker_id=speaker_id,  # Add speaker information
+                    confidence=1.0 - segment.get("no_speech_prob", 0.0),
+                    word_timestamps=words
+                ))
+            return segments
+        except Exception as e:
+            logger.error(f"Segment transcription failed: {e}")
+            raise
     def get_supported_languages(self) -> List[str]:
         """Get list of supported language codes."""
         return [

startup.py CHANGED Viewed

@@ -58,12 +58,21 @@ def preload_models():
         import model_preloader
         logger.info('✅ Model preloader module found')
         # Try to run the preloader
         result = subprocess.run(
             ['python', 'model_preloader.py'],
             capture_output=True,
             text=True,
-            timeout=300  # 5 minute timeout
         )
         if result.returncode == 0:
@@ -74,7 +83,11 @@ def preload_models():
         else:
             logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
             if result.stderr:
-                logger.warning(f'Preloader stderr: {result.stderr[:500]}...')
             return False
     except subprocess.TimeoutExpired:

         import model_preloader
         logger.info('✅ Model preloader module found')
+        # Set environment variables to handle onnxruntime issues
+        env = os.environ.copy()
+        env.update({
+            'ONNX_EXECUTION_PROVIDER': 'cpu',
+            'DISABLE_ONNX_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
+            'TF_ENABLE_ONEDNN_OPTS': '0'
+        })
         # Try to run the preloader
         result = subprocess.run(
             ['python', 'model_preloader.py'],
             capture_output=True,
             text=True,
+            timeout=300,  # 5 minute timeout
+            env=env
         )
         if result.returncode == 0:
         else:
             logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
             if result.stderr:
+                # Check if it's the onnxruntime issue
+                if 'cannot enable executable stack' in result.stderr:
+                    logger.warning('⚠️ ONNX Runtime executable stack issue detected - this is expected in containers')
+                else:
+                    logger.warning(f'Preloader stderr: {result.stderr[:500]}...')
             return False
     except subprocess.TimeoutExpired:

web_app.py CHANGED Viewed

@@ -44,8 +44,11 @@ import numpy as np
 import pandas as pd
 from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
 # Add src directory to Python path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

 import pandas as pd
 from dotenv import load_dotenv
+# Load environment variables with error handling
+try:
+    load_dotenv()
+except Exception as e:
+    logging.warning(f"Could not load .env file: {e}")
 # Add src directory to Python path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))