Prathamesh Sarjerao Vaidya commited on
Commit
7739a22
·
1 Parent(s): 4d857f2

made changes

Browse files
Files changed (4) hide show
  1. Dockerfile +5 -5
  2. model_preloader.py +61 -61
  3. requirements.txt +1 -1
  4. startup.py +44 -44
Dockerfile CHANGED
@@ -35,9 +35,9 @@ COPY requirements.txt .
35
  # Install Python dependencies with proper error handling
36
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
37
  # Install ONNX Runtime CPU version specifically
38
- pip install --no-cache-dir onnxruntime==1.16.3 && \
39
  # Fix executable stack issue
40
- find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
41
  # Install other requirements
42
  pip install --no-cache-dir -r requirements.txt
43
 
@@ -71,9 +71,9 @@ ENV PYTHONPATH=/app \
71
  HF_HUB_CACHE=/app/model_cache \
72
  FONTCONFIG_PATH=/tmp/fontconfig \
73
  # Critical ONNX Runtime fixes for containers
74
- ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
75
- ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
76
- ORT_DISABLE_TLS_ARENA=1 \
77
  CTRANSLATE2_FORCE_CPU_ISA=generic \
78
  # Threading and memory optimizations
79
  TF_CPP_MIN_LOG_LEVEL=2 \
 
35
  # Install Python dependencies with proper error handling
36
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
37
  # Install ONNX Runtime CPU version specifically
38
+ # pip install --no-cache-dir onnxruntime==1.16.3 && \
39
  # Fix executable stack issue
40
+ # find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
41
  # Install other requirements
42
  pip install --no-cache-dir -r requirements.txt
43
 
 
71
  HF_HUB_CACHE=/app/model_cache \
72
  FONTCONFIG_PATH=/tmp/fontconfig \
73
  # Critical ONNX Runtime fixes for containers
74
+ # ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
75
+ # ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
76
+ # ORT_DISABLE_TLS_ARENA=1 \
77
  CTRANSLATE2_FORCE_CPU_ISA=generic \
78
  # Threading and memory optimizations
79
  TF_CPP_MIN_LOG_LEVEL=2 \
model_preloader.py CHANGED
@@ -30,30 +30,40 @@ from rich.text import Text
30
  import psutil
31
 
32
  # CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
33
- import os
34
- os.environ.update({
35
- 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
36
- 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
37
- 'ORT_DISABLE_TLS_ARENA': '1',
38
- 'OMP_NUM_THREADS': '1',
39
- 'MKL_NUM_THREADS': '1',
40
- 'NUMBA_NUM_THREADS': '1',
41
- 'TF_ENABLE_ONEDNN_OPTS': '0',
42
- 'TOKENIZERS_PARALLELISM': 'false',
43
- 'MALLOC_ARENA_MAX': '2'
44
- })
45
 
46
  # Import ONNX Runtime with error suppression
47
- try:
48
- import warnings
49
- warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
50
- import onnxruntime as ort
51
- # Force CPU provider only
52
- ort.set_default_logger_severity(3) # ERROR level only
53
- except ImportError:
54
- pass
55
- except Exception as e:
56
- print(f"ONNX Runtime warning (expected in containers): {e}")
 
 
 
 
 
 
 
 
 
 
57
 
58
  # Add src directory to path
59
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
@@ -64,16 +74,6 @@ logger = logging.getLogger(__name__)
64
 
65
  console = Console()
66
 
67
- # CRITICAL: Set environment variables BEFORE importing any ML libraries
68
- # This fixes the ONNX Runtime executable stack issue in containers
69
- os.environ.update({
70
- 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
71
- 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
72
- 'OMP_NUM_THREADS': '1',
73
- 'TF_ENABLE_ONEDNN_OPTS': '0',
74
- 'TOKENIZERS_PARALLELISM': 'false'
75
- })
76
-
77
  class ModelPreloader:
78
  """Comprehensive model preloader with enhanced local cache detection."""
79
 
@@ -397,20 +397,20 @@ class ModelPreloader:
397
  except Exception as e:
398
  logger.warning(f"Error saving cache for {model_key}: {e}")
399
 
400
- def load_pyannote_pipeline(self) -> Optional[Pipeline]:
401
  """Load pyannote speaker diarization pipeline with container-safe settings."""
402
  try:
403
  console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
404
 
405
  # Fix ONNX Runtime libraries first
406
- try:
407
- import subprocess
408
- subprocess.run([
409
- 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
410
- '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
411
- ], capture_output=True, timeout=10, stderr=subprocess.DEVNULL)
412
- except:
413
- pass
414
 
415
  # Check for HuggingFace token
416
  hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
@@ -429,7 +429,7 @@ class ModelPreloader:
429
  os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
430
 
431
  # Disable other verbose logging
432
- logging.getLogger('onnxruntime').setLevel(logging.ERROR)
433
  logging.getLogger('transformers').setLevel(logging.ERROR)
434
 
435
  try:
@@ -453,28 +453,28 @@ class ModelPreloader:
453
  warnings.filters[:] = old_warning_filters
454
 
455
  except Exception as e:
456
- error_msg = str(e).lower()
457
- if "executable stack" in error_msg or "onnxruntime" in error_msg:
458
- console.print("[yellow]ONNX Runtime container warning (attempting workaround)...[/yellow]")
459
 
460
- # Try alternative approach - load without ONNX-dependent components
461
- try:
462
- # Try loading with CPU-only execution providers
463
- import onnxruntime as ort
464
- ort.set_default_logger_severity(4) # FATAL only
465
 
466
- pipeline = Pipeline.from_pretrained(
467
- "pyannote/speaker-diarization-3.1",
468
- use_auth_token=hf_token,
469
- cache_dir=str(self.cache_dir / "pyannote")
470
- )
471
- console.print(f"[green]SUCCESS: pyannote.audio loaded with workaround[/green]")
472
- return pipeline
473
 
474
- except Exception as e2:
475
- console.print(f"[red]ERROR: All pyannote loading methods failed: {e2}[/red]")
476
- else:
477
- console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
478
 
479
  logger.error(f"Pyannote loading failed: {e}")
480
  return None
 
30
  import psutil
31
 
32
  # CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
33
+ # import os
34
+ # os.environ.update({
35
+ # 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
36
+ # 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
37
+ # 'ORT_DISABLE_TLS_ARENA': '1',
38
+ # 'OMP_NUM_THREADS': '1',
39
+ # 'MKL_NUM_THREADS': '1',
40
+ # 'NUMBA_NUM_THREADS': '1',
41
+ # 'TF_ENABLE_ONEDNN_OPTS': '0',
42
+ # 'TOKENIZERS_PARALLELISM': 'false',
43
+ # 'MALLOC_ARENA_MAX': '2'
44
+ # })
45
 
46
  # Import ONNX Runtime with error suppression
47
+ # try:
48
+ # import warnings
49
+ # warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
50
+ # import onnxruntime as ort
51
+ # # Force CPU provider only
52
+ # ort.set_default_logger_severity(3) # ERROR level only
53
+ # except ImportError:
54
+ # pass
55
+ # except Exception as e:
56
+ # print(f"ONNX Runtime warning (expected in containers): {e}")
57
+
58
+ # CRITICAL: Set environment variables BEFORE importing any ML libraries
59
+ # This fixes the ONNX Runtime executable stack issue in containers
60
+ # os.environ.update({
61
+ # 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
62
+ # 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
63
+ # 'OMP_NUM_THREADS': '1',
64
+ # 'TF_ENABLE_ONEDNN_OPTS': '0',
65
+ # 'TOKENIZERS_PARALLELISM': 'false'
66
+ # })
67
 
68
  # Add src directory to path
69
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
 
74
 
75
  console = Console()
76
 
 
 
 
 
 
 
 
 
 
 
77
  class ModelPreloader:
78
  """Comprehensive model preloader with enhanced local cache detection."""
79
 
 
397
  except Exception as e:
398
  logger.warning(f"Error saving cache for {model_key}: {e}")
399
 
400
+ def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
401
  """Load pyannote speaker diarization pipeline with container-safe settings."""
402
  try:
403
  console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
404
 
405
  # Fix ONNX Runtime libraries first
406
+ # try:
407
+ # import subprocess
408
+ # subprocess.run([
409
+ # 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
410
+ # '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
411
+ # ], capture_output=True, timeout=10, stderr=subprocess.DEVNULL)
412
+ # except:
413
+ # pass
414
 
415
  # Check for HuggingFace token
416
  hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
 
429
  os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
430
 
431
  # Disable other verbose logging
432
+ # logging.getLogger('onnxruntime').setLevel(logging.ERROR)
433
  logging.getLogger('transformers').setLevel(logging.ERROR)
434
 
435
  try:
 
453
  warnings.filters[:] = old_warning_filters
454
 
455
  except Exception as e:
456
+ # error_msg = str(e).lower()
457
+ # if "executable stack" in error_msg or "onnxruntime" in error_msg:
458
+ # console.print("[yellow]ONNX Runtime container warning (attempting workaround)...[/yellow]")
459
 
460
+ # # Try alternative approach - load without ONNX-dependent components
461
+ # try:
462
+ # # Try loading with CPU-only execution providers
463
+ # import onnxruntime as ort
464
+ # ort.set_default_logger_severity(4) # FATAL only
465
 
466
+ # pipeline = Pipeline.from_pretrained(
467
+ # "pyannote/speaker-diarization-3.1",
468
+ # use_auth_token=hf_token,
469
+ # cache_dir=str(self.cache_dir / "pyannote")
470
+ # )
471
+ # console.print(f"[green]SUCCESS: pyannote.audio loaded with workaround[/green]")
472
+ # return pipeline
473
 
474
+ # except Exception as e2:
475
+ # console.print(f"[red]ERROR: All pyannote loading methods failed: {e2}[/red]")
476
+ # else:
477
+ # console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
478
 
479
  logger.error(f"Pyannote loading failed: {e}")
480
  return None
requirements.txt CHANGED
@@ -4,7 +4,7 @@ torchaudio==2.0.2
4
  torchvision==0.15.2
5
 
6
  # Keep regular ONNX Runtime with container-safe environment variables
7
- onnxruntime==1.15.1
8
 
9
  # Audio processing
10
  pyannote.audio==3.1.1
 
4
  torchvision==0.15.2
5
 
6
  # Keep regular ONNX Runtime with container-safe environment variables
7
+ # onnxruntime==1.15.1
8
 
9
  # Audio processing
10
  pyannote.audio==3.1.1
startup.py CHANGED
@@ -5,21 +5,21 @@ Handles model preloading and graceful fallbacks for containerized environments.
5
  """
6
 
7
  # Suppress ONNX Runtime warnings BEFORE any imports
8
- import warnings
9
- warnings.filterwarnings("ignore", message=".*executable stack.*")
10
- warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
11
 
12
  import os
13
  import subprocess
14
  import sys
15
  import logging
16
 
17
- # Set critical environment variables immediately
18
- os.environ.update({
19
- 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
20
- 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
21
- 'ORT_DISABLE_TLS_ARENA': '1'
22
- })
23
 
24
  # Configure logging
25
  logging.basicConfig(
@@ -71,39 +71,39 @@ def preload_models():
71
  logger.info('✅ Model preloader module found')
72
 
73
  # Set comprehensive environment variables for ONNX Runtime
74
- env = os.environ.copy()
75
- env.update({
76
- 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
77
- 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
78
- 'ORT_DISABLE_TLS_ARENA': '1',
79
- 'TF_ENABLE_ONEDNN_OPTS': '0',
80
- 'OMP_NUM_THREADS': '1',
81
- 'MKL_NUM_THREADS': '1',
82
- 'NUMBA_NUM_THREADS': '1',
83
- 'TOKENIZERS_PARALLELISM': 'false',
84
- 'MALLOC_ARENA_MAX': '2',
85
- # Additional ONNX Runtime fixes
86
- 'ONNXRUNTIME_LOG_SEVERITY_LEVEL': '3',
87
- 'ORT_LOGGING_LEVEL': 'WARNING'
88
- })
89
 
90
- # Try to fix ONNX Runtime libraries before running preloader
91
- try:
92
- import subprocess
93
- subprocess.run([
94
- 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
95
- '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
96
- ], capture_output=True, timeout=30)
97
- except:
98
- pass # Continue if execstack fix fails
99
 
100
  # Try to run the preloader
101
  result = subprocess.run(
102
  ['python', 'model_preloader.py'],
103
  capture_output=True,
104
  text=True,
105
- timeout=300, # 5 minute timeout
106
- env=env
107
  )
108
 
109
  if result.returncode == 0:
@@ -113,15 +113,15 @@ def preload_models():
113
  return True
114
  else:
115
  logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
116
- if result.stderr:
117
- # Filter out expected ONNX warnings
118
- stderr_lines = result.stderr.split('\n')
119
- important_errors = [line for line in stderr_lines
120
- if 'executable stack' not in line.lower()
121
- and 'onnxruntime' not in line.lower()
122
- and line.strip()]
123
- if important_errors:
124
- logger.warning(f'Important errors: {important_errors[:3]}')
125
  return False
126
 
127
  except subprocess.TimeoutExpired:
 
5
  """
6
 
7
  # Suppress ONNX Runtime warnings BEFORE any imports
8
+ # import warnings
9
+ # warnings.filterwarnings("ignore", message=".*executable stack.*")
10
+ # warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
11
 
12
  import os
13
  import subprocess
14
  import sys
15
  import logging
16
 
17
+ # # Set critical environment variables immediately
18
+ # os.environ.update({
19
+ # 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
20
+ # 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
21
+ # 'ORT_DISABLE_TLS_ARENA': '1'
22
+ # })
23
 
24
  # Configure logging
25
  logging.basicConfig(
 
71
  logger.info('✅ Model preloader module found')
72
 
73
  # Set comprehensive environment variables for ONNX Runtime
74
+ # env = os.environ.copy()
75
+ # env.update({
76
+ # 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
77
+ # 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
78
+ # 'ORT_DISABLE_TLS_ARENA': '1',
79
+ # 'TF_ENABLE_ONEDNN_OPTS': '0',
80
+ # 'OMP_NUM_THREADS': '1',
81
+ # 'MKL_NUM_THREADS': '1',
82
+ # 'NUMBA_NUM_THREADS': '1',
83
+ # 'TOKENIZERS_PARALLELISM': 'false',
84
+ # 'MALLOC_ARENA_MAX': '2',
85
+ # # Additional ONNX Runtime fixes
86
+ # 'ONNXRUNTIME_LOG_SEVERITY_LEVEL': '3',
87
+ # 'ORT_LOGGING_LEVEL': 'WARNING'
88
+ # })
89
 
90
+ # # Try to fix ONNX Runtime libraries before running preloader
91
+ # try:
92
+ # import subprocess
93
+ # subprocess.run([
94
+ # 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
95
+ # '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
96
+ # ], capture_output=True, timeout=30)
97
+ # except:
98
+ # pass # Continue if execstack fix fails
99
 
100
  # Try to run the preloader
101
  result = subprocess.run(
102
  ['python', 'model_preloader.py'],
103
  capture_output=True,
104
  text=True,
105
+ timeout=300 # 5 minute timeout
106
+ # env=env
107
  )
108
 
109
  if result.returncode == 0:
 
113
  return True
114
  else:
115
  logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
116
+ # if result.stderr:
117
+ # # Filter out expected ONNX warnings
118
+ # stderr_lines = result.stderr.split('\n')
119
+ # important_errors = [line for line in stderr_lines
120
+ # if 'executable stack' not in line.lower()
121
+ # and 'onnxruntime' not in line.lower()
122
+ # and line.strip()]
123
+ # if important_errors:
124
+ # logger.warning(f'Important errors: {important_errors[:3]}')
125
  return False
126
 
127
  except subprocess.TimeoutExpired: