Spaces:

prathameshv07
/

Multilingual-Audio-Intelligence-System

Running

App Files Files Community

Prathamesh Sarjerao Vaidya commited on Sep 4

Commit

938d58f

1 Parent(s): fdcc0cf

made changes

Browse files

Files changed (6) hide show

Dockerfile +6 -7
demo_config.json +3 -3
model_preloader.py +10 -0
requirements.txt +35 -68
startup.py +4 -3
web_app.py +7 -7

Dockerfile CHANGED Viewed

@@ -39,10 +39,10 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
 COPY . .
 # Create necessary directories with proper permissions
-RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results demo_audio \
     /tmp/matplotlib /tmp/fontconfig \
     && chmod -R 777 templates static \
-    && chmod -R 777 uploads outputs model_cache temp_files demo_results demo_audio \
     && chmod -R 777 /tmp/matplotlib /tmp/fontconfig
 # Set environment variables for Hugging Face Spaces
@@ -64,19 +64,18 @@ ENV PYTHONPATH=/app \
     HUGGINGFACE_HUB_CACHE=/app/model_cache \
     HF_HUB_CACHE=/app/model_cache \
     FONTCONFIG_PATH=/tmp/fontconfig \
     # Fix for audio processing libraries
     CTRANSLATE2_FORCE_CPU_ISA=generic \
     # Disable problematic features
     TF_CPP_MIN_LOG_LEVEL=2 \
     TOKENIZERS_PARALLELISM=false \
-    # Fix executable stack issues
-    ONNX_EXECUTION_PROVIDER=cpu \
     # Disable problematic optimizations
     OMP_NUM_THREADS=1 \
     # Suppress tensorboard warnings
-    TF_ENABLE_ONEDNN_OPTS=0 \
-    # Disable problematic features
-    DISABLE_ONNX_EXECUTION_PROVIDERS=CPUExecutionProvider
 # Expose port for Hugging Face Spaces
 EXPOSE 7860

 COPY . .
 # Create necessary directories with proper permissions
+RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results demo_audio results \
     /tmp/matplotlib /tmp/fontconfig \
     && chmod -R 777 templates static \
+    && chmod -R 777 uploads outputs model_cache temp_files demo_results demo_audio results \
     && chmod -R 777 /tmp/matplotlib /tmp/fontconfig
 # Set environment variables for Hugging Face Spaces
     HUGGINGFACE_HUB_CACHE=/app/model_cache \
     HF_HUB_CACHE=/app/model_cache \
     FONTCONFIG_PATH=/tmp/fontconfig \
+    # Fix for ONNX Runtime in containers (KEY FIX)
+    ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
+    ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
     # Fix for audio processing libraries
     CTRANSLATE2_FORCE_CPU_ISA=generic \
     # Disable problematic features
     TF_CPP_MIN_LOG_LEVEL=2 \
     TOKENIZERS_PARALLELISM=false \
     # Disable problematic optimizations
     OMP_NUM_THREADS=1 \
     # Suppress tensorboard warnings
+    TF_ENABLE_ONEDNN_OPTS=0
 # Expose port for Hugging Face Spaces
 EXPOSE 7860

demo_config.json CHANGED Viewed

@@ -6,7 +6,7 @@
       "filename": "Yuri_Kizaki.mp3",
       "language": "ja",
       "description": "Japanese audio message about website communication",
-      "duration": "00:01:45",
       "url": "https://www.mitsue.co.jp/service/audio_and_video/audio_production/media/narrators_sample/yuri_kizaki/03.mp3"
     },
     {
@@ -15,7 +15,7 @@
       "filename": "Film_Podcast.mp3",
       "language": "fr",
       "description": "French podcast discussing various films and cinema",
-      "duration": "00:03:32",
       "url": "https://www.lightbulblanguages.co.uk/resources/audio/film-podcast.mp3"
     },
     {
@@ -33,7 +33,7 @@
       "filename": "Car_Trouble.mp3",
       "language": "hi",
       "description": "Conversation about waiting for a mechanic and basic assistance",
-      "duration": "00:02:45",
       "url": "https://www.tuttlepublishing.com/content/docs/9780804844383/06-18%20Part2%20Car%20Trouble.mp3"
     }
   ],

       "filename": "Yuri_Kizaki.mp3",
       "language": "ja",
       "description": "Japanese audio message about website communication",
+      "duration": "00:00:32",
       "url": "https://www.mitsue.co.jp/service/audio_and_video/audio_production/media/narrators_sample/yuri_kizaki/03.mp3"
     },
     {
       "filename": "Film_Podcast.mp3",
       "language": "fr",
       "description": "French podcast discussing various films and cinema",
+      "duration": "00:03:50",
       "url": "https://www.lightbulblanguages.co.uk/resources/audio/film-podcast.mp3"
     },
     {
       "filename": "Car_Trouble.mp3",
       "language": "hi",
       "description": "Conversation about waiting for a mechanic and basic assistance",
+      "duration": "00:00:45",
       "url": "https://www.tuttlepublishing.com/content/docs/9780804844383/06-18%20Part2%20Car%20Trouble.mp3"
     }
   ],

model_preloader.py CHANGED Viewed

@@ -38,6 +38,16 @@ logger = logging.getLogger(__name__)
 console = Console()
 class ModelPreloader:
     """Comprehensive model preloader with enhanced local cache detection."""

 console = Console()
+# CRITICAL: Set environment variables BEFORE importing any ML libraries
+# This fixes the ONNX Runtime executable stack issue in containers
+os.environ.update({
+    'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
+    'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
+    'OMP_NUM_THREADS': '1',
+    'TF_ENABLE_ONEDNN_OPTS': '0',
+    'TOKENIZERS_PARALLELISM': 'false'
+})
 class ModelPreloader:
     """Comprehensive model preloader with enhanced local cache detection."""

requirements.txt CHANGED Viewed

@@ -1,88 +1,55 @@
-# Hugging Face Spaces Compatible Requirements
-# Optimized for containerized deployment
-# Core ML Libraries (HF Spaces compatible)
 torch==2.0.1
-torchvision==0.15.2
 torchaudio==2.0.2
-transformers
-# Audio Processing (Fixed versions for HF Spaces)
-librosa==0.10.1
-pydub==0.25.1
-soundfile==0.12.1
-# Use openai-whisper instead of faster-whisper to avoid CTranslate2 issues
-openai-whisper==20231117
-audioread==3.0.1
-ffmpeg-python==0.2.0
-# Speaker Diarization (Essential for HF Spaces)
 pyannote.audio==3.1.1
-pyannote.core
-pyannote.database
-pyannote.metrics==3.2.1
-# Performance & Optimization
-numba==0.58.1
-# Use CPU-only onnxruntime to avoid executable stack issues
-onnxruntime==1.16.3
-accelerate==0.20.3
-# Core Utilities
-numpy
-psutil==5.9.6
-python-dotenv==1.0.0
-requests==2.31.0
-tqdm==4.66.1
-ujson==5.8.0
-colorlog==6.7.0
-pyyaml==6.0.1
-python-dateutil==2.8.2
-# Web Framework
 fastapi==0.104.1
-uvicorn==0.24.0
 python-multipart==0.0.6
-jinja2==3.1.2
 websockets==12.0
 aiofiles==23.2.1
-aiohttp==3.9.1
-httpx
-# Translation APIs
-googletrans
-deep-translator==1.11.4
-# Scientific Computing
 scipy==1.11.4
 matplotlib==3.7.3
 plotly==5.17.0
-scikit-learn==1.3.2
-# PS-6 Specific Dependencies (HF Spaces compatible)
-speechbrain==0.5.16
-# Remove demucs as it's causing issues in containers
-# demucs==4.0.0
-PyWavelets==1.4.1
-# NLP
-nltk==3.8.1
-langdetect==1.0.9
-# Logging & Monitoring
 rich==13.7.0
-# Machine Learning
-tensorflow==2.15.0
-# Fix tensorboard compatibility
-tensorboard==2.15.2
-# Additional Dependencies
 huggingface-hub==0.16.4
-tokenizers
-sentencepiece==0.1.99
-protobuf==3.20.3
-# System dependencies for audio processing
-webrtcvad==2.0.10
-resampy==0.4.2

+# Core ML libraries with container-friendly versions
 torch==2.0.1
 torchaudio==2.0.2
+torchvision==0.15.2
+# Keep regular ONNX Runtime with container-safe environment variables
+onnxruntime==1.16.3
+# Audio processing
 pyannote.audio==3.1.1
+openai-whisper==20231117
+librosa==0.10.1
+soundfile==0.12.1
+pydub==0.25.1
+webrtcvad==2.0.10
+# NLP and Translation
+transformers==4.35.2
+tokenizers==0.15.2
+sentencepiece==0.1.99
+deep-translator==1.11.4
+langdetect==1.0.9
+googletrans==4.0.2
+# Web framework
 fastapi==0.104.1
+uvicorn[standard]==0.24.0
 python-multipart==0.0.6
 websockets==12.0
 aiofiles==23.2.1
+# Data processing
+numpy==1.26.4
+pandas==2.3.2
 scipy==1.11.4
 matplotlib==3.7.3
 plotly==5.17.0
+# Utilities
 rich==13.7.0
+tqdm==4.66.1
+psutil==5.9.6
+pyyaml==6.0.1
+python-dotenv==1.0.0
+click==8.1.8
+# HuggingFace ecosystem
 huggingface-hub==0.16.4
+accelerate==0.20.3
+safetensors==0.6.2
+# Additional utilities
+ffmpeg-python==0.2.0
+httpx==0.28.1
+requests==2.31.0

startup.py CHANGED Viewed

@@ -61,9 +61,10 @@ def preload_models():
         # Set environment variables to handle onnxruntime issues
         env = os.environ.copy()
         env.update({
-            'ONNX_EXECUTION_PROVIDER': 'cpu',
-            'DISABLE_ONNX_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
-            'TF_ENABLE_ONEDNN_OPTS': '0'
         })
         # Try to run the preloader

         # Set environment variables to handle onnxruntime issues
         env = os.environ.copy()
         env.update({
+            'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
+            'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
+            'TF_ENABLE_ONEDNN_OPTS': '0',
+            'OMP_NUM_THREADS': '1'
         })
         # Try to run the preloader

web_app.py CHANGED Viewed

@@ -124,7 +124,7 @@ DEMO_FILES = {
         "expected_translation": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites, you can add new value to the information and effectively differentiate your website from others.",
         "category": "business",
         "difficulty": "intermediate",
-        "duration": "00:01:45"
     },
     "film_podcast": {
         "name": "Film Podcast",
@@ -137,7 +137,7 @@ DEMO_FILES = {
         "expected_translation": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg and the legal problems this caused for the creator of this site.",
         "category": "entertainment",
         "difficulty": "advanced",
-        "duration": "00:03:32"
     },
     "tamil_interview": {
         "name": "Tamil Wikipedia Interview",
@@ -166,7 +166,7 @@ DEMO_FILES = {
         "expected_translation": "The car has broken down. We are waiting for the mechanic. It will take some time.",
         "category": "daily_life",
         "difficulty": "beginner",
-        "duration": "00:02:45",
         "featured": True,
         "new": True,
         "indian_language": True
@@ -587,7 +587,7 @@ class AudioProcessor:
         if self.pipeline is None:
             logger.info("Initializing Audio Intelligence Pipeline...")
-            try:
                 self.pipeline = AudioIntelligencePipeline(
                     whisper_model_size=whisper_model,
                     target_language=target_language,
@@ -596,7 +596,7 @@ class AudioProcessor:
                     output_dir="./outputs"
                 )
                 logger.info("Pipeline initialization complete!")
-            except Exception as e:
                 logger.error(f"Pipeline initialization failed: {e}")
                 raise
@@ -740,7 +740,7 @@ async def home(request: Request):
 @app.post("/api/upload")
 async def upload_audio(
     request: Request,
-    file: UploadFile = File(...),
     whisper_model: str = Form("small"),
     target_language: str = Form("en"),
     hf_token: Optional[str] = Form(None)
@@ -892,7 +892,7 @@ async def get_results(task_id: str):
     else:
         # Fallback if results not found
-        return JSONResponse({
             "task_id": task_id,
             "status": "complete",
             "results": {

         "expected_translation": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites, you can add new value to the information and effectively differentiate your website from others.",
         "category": "business",
         "difficulty": "intermediate",
+        "duration": "00:00:32"
     },
     "film_podcast": {
         "name": "Film Podcast",
         "expected_translation": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg and the legal problems this caused for the creator of this site.",
         "category": "entertainment",
         "difficulty": "advanced",
+        "duration": "00:03:50"
     },
     "tamil_interview": {
         "name": "Tamil Wikipedia Interview",
         "expected_translation": "The car has broken down. We are waiting for the mechanic. It will take some time.",
         "category": "daily_life",
         "difficulty": "beginner",
+        "duration": "00:00:45",
         "featured": True,
         "new": True,
         "indian_language": True
         if self.pipeline is None:
             logger.info("Initializing Audio Intelligence Pipeline...")
+        try:
                 self.pipeline = AudioIntelligencePipeline(
                     whisper_model_size=whisper_model,
                     target_language=target_language,
                     output_dir="./outputs"
                 )
                 logger.info("Pipeline initialization complete!")
+        except Exception as e:
                 logger.error(f"Pipeline initialization failed: {e}")
                 raise
 @app.post("/api/upload")
 async def upload_audio(
     request: Request,
+            file: UploadFile = File(...),
     whisper_model: str = Form("small"),
     target_language: str = Form("en"),
     hf_token: Optional[str] = Form(None)
     else:
         # Fallback if results not found
+                return JSONResponse({
             "task_id": task_id,
             "status": "complete",
             "results": {