Prathamesh Sarjerao Vaidya
commited on
Commit
·
7739a22
1
Parent(s):
4d857f2
made changes
Browse files- Dockerfile +5 -5
- model_preloader.py +61 -61
- requirements.txt +1 -1
- startup.py +44 -44
Dockerfile
CHANGED
@@ -35,9 +35,9 @@ COPY requirements.txt .
|
|
35 |
# Install Python dependencies with proper error handling
|
36 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
37 |
# Install ONNX Runtime CPU version specifically
|
38 |
-
pip install --no-cache-dir onnxruntime==1.16.3 && \
|
39 |
# Fix executable stack issue
|
40 |
-
find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
|
41 |
# Install other requirements
|
42 |
pip install --no-cache-dir -r requirements.txt
|
43 |
|
@@ -71,9 +71,9 @@ ENV PYTHONPATH=/app \
|
|
71 |
HF_HUB_CACHE=/app/model_cache \
|
72 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
73 |
# Critical ONNX Runtime fixes for containers
|
74 |
-
ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
75 |
-
ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
76 |
-
ORT_DISABLE_TLS_ARENA=1 \
|
77 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
78 |
# Threading and memory optimizations
|
79 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
|
|
35 |
# Install Python dependencies with proper error handling
|
36 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
37 |
# Install ONNX Runtime CPU version specifically
|
38 |
+
# pip install --no-cache-dir onnxruntime==1.16.3 && \
|
39 |
# Fix executable stack issue
|
40 |
+
# find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
|
41 |
# Install other requirements
|
42 |
pip install --no-cache-dir -r requirements.txt
|
43 |
|
|
|
71 |
HF_HUB_CACHE=/app/model_cache \
|
72 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
73 |
# Critical ONNX Runtime fixes for containers
|
74 |
+
# ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
75 |
+
# ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
76 |
+
# ORT_DISABLE_TLS_ARENA=1 \
|
77 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
78 |
# Threading and memory optimizations
|
79 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
model_preloader.py
CHANGED
@@ -30,30 +30,40 @@ from rich.text import Text
|
|
30 |
import psutil
|
31 |
|
32 |
# CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
|
33 |
-
import os
|
34 |
-
os.environ.update({
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
})
|
45 |
|
46 |
# Import ONNX Runtime with error suppression
|
47 |
-
try:
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
except ImportError:
|
54 |
-
|
55 |
-
except Exception as e:
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
# Add src directory to path
|
59 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
@@ -64,16 +74,6 @@ logger = logging.getLogger(__name__)
|
|
64 |
|
65 |
console = Console()
|
66 |
|
67 |
-
# CRITICAL: Set environment variables BEFORE importing any ML libraries
|
68 |
-
# This fixes the ONNX Runtime executable stack issue in containers
|
69 |
-
os.environ.update({
|
70 |
-
'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
71 |
-
'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
72 |
-
'OMP_NUM_THREADS': '1',
|
73 |
-
'TF_ENABLE_ONEDNN_OPTS': '0',
|
74 |
-
'TOKENIZERS_PARALLELISM': 'false'
|
75 |
-
})
|
76 |
-
|
77 |
class ModelPreloader:
|
78 |
"""Comprehensive model preloader with enhanced local cache detection."""
|
79 |
|
@@ -397,20 +397,20 @@ class ModelPreloader:
|
|
397 |
except Exception as e:
|
398 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
399 |
|
400 |
-
def load_pyannote_pipeline(self) -> Optional[Pipeline]:
|
401 |
"""Load pyannote speaker diarization pipeline with container-safe settings."""
|
402 |
try:
|
403 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
404 |
|
405 |
# Fix ONNX Runtime libraries first
|
406 |
-
try:
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
except:
|
413 |
-
|
414 |
|
415 |
# Check for HuggingFace token
|
416 |
hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
|
@@ -429,7 +429,7 @@ class ModelPreloader:
|
|
429 |
os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
|
430 |
|
431 |
# Disable other verbose logging
|
432 |
-
logging.getLogger('onnxruntime').setLevel(logging.ERROR)
|
433 |
logging.getLogger('transformers').setLevel(logging.ERROR)
|
434 |
|
435 |
try:
|
@@ -453,28 +453,28 @@ class ModelPreloader:
|
|
453 |
warnings.filters[:] = old_warning_filters
|
454 |
|
455 |
except Exception as e:
|
456 |
-
error_msg = str(e).lower()
|
457 |
-
if "executable stack" in error_msg or "onnxruntime" in error_msg:
|
458 |
-
|
459 |
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
|
474 |
-
|
475 |
-
|
476 |
-
else:
|
477 |
-
|
478 |
|
479 |
logger.error(f"Pyannote loading failed: {e}")
|
480 |
return None
|
|
|
30 |
import psutil
|
31 |
|
32 |
# CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
|
33 |
+
# import os
|
34 |
+
# os.environ.update({
|
35 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
36 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
37 |
+
# 'ORT_DISABLE_TLS_ARENA': '1',
|
38 |
+
# 'OMP_NUM_THREADS': '1',
|
39 |
+
# 'MKL_NUM_THREADS': '1',
|
40 |
+
# 'NUMBA_NUM_THREADS': '1',
|
41 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
42 |
+
# 'TOKENIZERS_PARALLELISM': 'false',
|
43 |
+
# 'MALLOC_ARENA_MAX': '2'
|
44 |
+
# })
|
45 |
|
46 |
# Import ONNX Runtime with error suppression
|
47 |
+
# try:
|
48 |
+
# import warnings
|
49 |
+
# warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
50 |
+
# import onnxruntime as ort
|
51 |
+
# # Force CPU provider only
|
52 |
+
# ort.set_default_logger_severity(3) # ERROR level only
|
53 |
+
# except ImportError:
|
54 |
+
# pass
|
55 |
+
# except Exception as e:
|
56 |
+
# print(f"ONNX Runtime warning (expected in containers): {e}")
|
57 |
+
|
58 |
+
# CRITICAL: Set environment variables BEFORE importing any ML libraries
|
59 |
+
# This fixes the ONNX Runtime executable stack issue in containers
|
60 |
+
# os.environ.update({
|
61 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
62 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
63 |
+
# 'OMP_NUM_THREADS': '1',
|
64 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
65 |
+
# 'TOKENIZERS_PARALLELISM': 'false'
|
66 |
+
# })
|
67 |
|
68 |
# Add src directory to path
|
69 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
74 |
|
75 |
console = Console()
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
class ModelPreloader:
|
78 |
"""Comprehensive model preloader with enhanced local cache detection."""
|
79 |
|
|
|
397 |
except Exception as e:
|
398 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
399 |
|
400 |
+
def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
|
401 |
"""Load pyannote speaker diarization pipeline with container-safe settings."""
|
402 |
try:
|
403 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
404 |
|
405 |
# Fix ONNX Runtime libraries first
|
406 |
+
# try:
|
407 |
+
# import subprocess
|
408 |
+
# subprocess.run([
|
409 |
+
# 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
|
410 |
+
# '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
|
411 |
+
# ], capture_output=True, timeout=10, stderr=subprocess.DEVNULL)
|
412 |
+
# except:
|
413 |
+
# pass
|
414 |
|
415 |
# Check for HuggingFace token
|
416 |
hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
|
|
|
429 |
os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
|
430 |
|
431 |
# Disable other verbose logging
|
432 |
+
# logging.getLogger('onnxruntime').setLevel(logging.ERROR)
|
433 |
logging.getLogger('transformers').setLevel(logging.ERROR)
|
434 |
|
435 |
try:
|
|
|
453 |
warnings.filters[:] = old_warning_filters
|
454 |
|
455 |
except Exception as e:
|
456 |
+
# error_msg = str(e).lower()
|
457 |
+
# if "executable stack" in error_msg or "onnxruntime" in error_msg:
|
458 |
+
# console.print("[yellow]ONNX Runtime container warning (attempting workaround)...[/yellow]")
|
459 |
|
460 |
+
# # Try alternative approach - load without ONNX-dependent components
|
461 |
+
# try:
|
462 |
+
# # Try loading with CPU-only execution providers
|
463 |
+
# import onnxruntime as ort
|
464 |
+
# ort.set_default_logger_severity(4) # FATAL only
|
465 |
|
466 |
+
# pipeline = Pipeline.from_pretrained(
|
467 |
+
# "pyannote/speaker-diarization-3.1",
|
468 |
+
# use_auth_token=hf_token,
|
469 |
+
# cache_dir=str(self.cache_dir / "pyannote")
|
470 |
+
# )
|
471 |
+
# console.print(f"[green]SUCCESS: pyannote.audio loaded with workaround[/green]")
|
472 |
+
# return pipeline
|
473 |
|
474 |
+
# except Exception as e2:
|
475 |
+
# console.print(f"[red]ERROR: All pyannote loading methods failed: {e2}[/red]")
|
476 |
+
# else:
|
477 |
+
# console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
|
478 |
|
479 |
logger.error(f"Pyannote loading failed: {e}")
|
480 |
return None
|
requirements.txt
CHANGED
@@ -4,7 +4,7 @@ torchaudio==2.0.2
|
|
4 |
torchvision==0.15.2
|
5 |
|
6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
7 |
-
onnxruntime==1.15.1
|
8 |
|
9 |
# Audio processing
|
10 |
pyannote.audio==3.1.1
|
|
|
4 |
torchvision==0.15.2
|
5 |
|
6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
7 |
+
# onnxruntime==1.15.1
|
8 |
|
9 |
# Audio processing
|
10 |
pyannote.audio==3.1.1
|
startup.py
CHANGED
@@ -5,21 +5,21 @@ Handles model preloading and graceful fallbacks for containerized environments.
|
|
5 |
"""
|
6 |
|
7 |
# Suppress ONNX Runtime warnings BEFORE any imports
|
8 |
-
import warnings
|
9 |
-
warnings.filterwarnings("ignore", message=".*executable stack.*")
|
10 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
11 |
|
12 |
import os
|
13 |
import subprocess
|
14 |
import sys
|
15 |
import logging
|
16 |
|
17 |
-
# Set critical environment variables immediately
|
18 |
-
os.environ.update({
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
})
|
23 |
|
24 |
# Configure logging
|
25 |
logging.basicConfig(
|
@@ -71,39 +71,39 @@ def preload_models():
|
|
71 |
logger.info('✅ Model preloader module found')
|
72 |
|
73 |
# Set comprehensive environment variables for ONNX Runtime
|
74 |
-
env = os.environ.copy()
|
75 |
-
env.update({
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
})
|
89 |
|
90 |
-
# Try to fix ONNX Runtime libraries before running preloader
|
91 |
-
try:
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
except:
|
98 |
-
|
99 |
|
100 |
# Try to run the preloader
|
101 |
result = subprocess.run(
|
102 |
['python', 'model_preloader.py'],
|
103 |
capture_output=True,
|
104 |
text=True,
|
105 |
-
timeout=300
|
106 |
-
env=env
|
107 |
)
|
108 |
|
109 |
if result.returncode == 0:
|
@@ -113,15 +113,15 @@ def preload_models():
|
|
113 |
return True
|
114 |
else:
|
115 |
logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
|
116 |
-
if result.stderr:
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
return False
|
126 |
|
127 |
except subprocess.TimeoutExpired:
|
|
|
5 |
"""
|
6 |
|
7 |
# Suppress ONNX Runtime warnings BEFORE any imports
|
8 |
+
# import warnings
|
9 |
+
# warnings.filterwarnings("ignore", message=".*executable stack.*")
|
10 |
+
# warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
11 |
|
12 |
import os
|
13 |
import subprocess
|
14 |
import sys
|
15 |
import logging
|
16 |
|
17 |
+
# # Set critical environment variables immediately
|
18 |
+
# os.environ.update({
|
19 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
20 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
21 |
+
# 'ORT_DISABLE_TLS_ARENA': '1'
|
22 |
+
# })
|
23 |
|
24 |
# Configure logging
|
25 |
logging.basicConfig(
|
|
|
71 |
logger.info('✅ Model preloader module found')
|
72 |
|
73 |
# Set comprehensive environment variables for ONNX Runtime
|
74 |
+
# env = os.environ.copy()
|
75 |
+
# env.update({
|
76 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
77 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
78 |
+
# 'ORT_DISABLE_TLS_ARENA': '1',
|
79 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
80 |
+
# 'OMP_NUM_THREADS': '1',
|
81 |
+
# 'MKL_NUM_THREADS': '1',
|
82 |
+
# 'NUMBA_NUM_THREADS': '1',
|
83 |
+
# 'TOKENIZERS_PARALLELISM': 'false',
|
84 |
+
# 'MALLOC_ARENA_MAX': '2',
|
85 |
+
# # Additional ONNX Runtime fixes
|
86 |
+
# 'ONNXRUNTIME_LOG_SEVERITY_LEVEL': '3',
|
87 |
+
# 'ORT_LOGGING_LEVEL': 'WARNING'
|
88 |
+
# })
|
89 |
|
90 |
+
# # Try to fix ONNX Runtime libraries before running preloader
|
91 |
+
# try:
|
92 |
+
# import subprocess
|
93 |
+
# subprocess.run([
|
94 |
+
# 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
|
95 |
+
# '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
|
96 |
+
# ], capture_output=True, timeout=30)
|
97 |
+
# except:
|
98 |
+
# pass # Continue if execstack fix fails
|
99 |
|
100 |
# Try to run the preloader
|
101 |
result = subprocess.run(
|
102 |
['python', 'model_preloader.py'],
|
103 |
capture_output=True,
|
104 |
text=True,
|
105 |
+
timeout=300 # 5 minute timeout
|
106 |
+
# env=env
|
107 |
)
|
108 |
|
109 |
if result.returncode == 0:
|
|
|
113 |
return True
|
114 |
else:
|
115 |
logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
|
116 |
+
# if result.stderr:
|
117 |
+
# # Filter out expected ONNX warnings
|
118 |
+
# stderr_lines = result.stderr.split('\n')
|
119 |
+
# important_errors = [line for line in stderr_lines
|
120 |
+
# if 'executable stack' not in line.lower()
|
121 |
+
# and 'onnxruntime' not in line.lower()
|
122 |
+
# and line.strip()]
|
123 |
+
# if important_errors:
|
124 |
+
# logger.warning(f'Important errors: {important_errors[:3]}')
|
125 |
return False
|
126 |
|
127 |
except subprocess.TimeoutExpired:
|