Prathamesh Sarjerao Vaidya commited on
Commit
938d58f
·
1 Parent(s): fdcc0cf

made changes

Browse files
Files changed (6) hide show
  1. Dockerfile +6 -7
  2. demo_config.json +3 -3
  3. model_preloader.py +10 -0
  4. requirements.txt +35 -68
  5. startup.py +4 -3
  6. web_app.py +7 -7
Dockerfile CHANGED
@@ -39,10 +39,10 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
39
  COPY . .
40
 
41
  # Create necessary directories with proper permissions
42
- RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results demo_audio \
43
  /tmp/matplotlib /tmp/fontconfig \
44
  && chmod -R 777 templates static \
45
- && chmod -R 777 uploads outputs model_cache temp_files demo_results demo_audio \
46
  && chmod -R 777 /tmp/matplotlib /tmp/fontconfig
47
 
48
  # Set environment variables for Hugging Face Spaces
@@ -64,19 +64,18 @@ ENV PYTHONPATH=/app \
64
  HUGGINGFACE_HUB_CACHE=/app/model_cache \
65
  HF_HUB_CACHE=/app/model_cache \
66
  FONTCONFIG_PATH=/tmp/fontconfig \
 
 
 
67
  # Fix for audio processing libraries
68
  CTRANSLATE2_FORCE_CPU_ISA=generic \
69
  # Disable problematic features
70
  TF_CPP_MIN_LOG_LEVEL=2 \
71
  TOKENIZERS_PARALLELISM=false \
72
- # Fix executable stack issues
73
- ONNX_EXECUTION_PROVIDER=cpu \
74
  # Disable problematic optimizations
75
  OMP_NUM_THREADS=1 \
76
  # Suppress tensorboard warnings
77
- TF_ENABLE_ONEDNN_OPTS=0 \
78
- # Disable problematic features
79
- DISABLE_ONNX_EXECUTION_PROVIDERS=CPUExecutionProvider
80
 
81
  # Expose port for Hugging Face Spaces
82
  EXPOSE 7860
 
39
  COPY . .
40
 
41
  # Create necessary directories with proper permissions
42
+ RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results demo_audio results \
43
  /tmp/matplotlib /tmp/fontconfig \
44
  && chmod -R 777 templates static \
45
+ && chmod -R 777 uploads outputs model_cache temp_files demo_results demo_audio results \
46
  && chmod -R 777 /tmp/matplotlib /tmp/fontconfig
47
 
48
  # Set environment variables for Hugging Face Spaces
 
64
  HUGGINGFACE_HUB_CACHE=/app/model_cache \
65
  HF_HUB_CACHE=/app/model_cache \
66
  FONTCONFIG_PATH=/tmp/fontconfig \
67
+ # Fix for ONNX Runtime in containers (KEY FIX)
68
+ ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
69
+ ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
70
  # Fix for audio processing libraries
71
  CTRANSLATE2_FORCE_CPU_ISA=generic \
72
  # Disable problematic features
73
  TF_CPP_MIN_LOG_LEVEL=2 \
74
  TOKENIZERS_PARALLELISM=false \
 
 
75
  # Disable problematic optimizations
76
  OMP_NUM_THREADS=1 \
77
  # Suppress tensorboard warnings
78
+ TF_ENABLE_ONEDNN_OPTS=0
 
 
79
 
80
  # Expose port for Hugging Face Spaces
81
  EXPOSE 7860
demo_config.json CHANGED
@@ -6,7 +6,7 @@
6
  "filename": "Yuri_Kizaki.mp3",
7
  "language": "ja",
8
  "description": "Japanese audio message about website communication",
9
- "duration": "00:01:45",
10
  "url": "https://www.mitsue.co.jp/service/audio_and_video/audio_production/media/narrators_sample/yuri_kizaki/03.mp3"
11
  },
12
  {
@@ -15,7 +15,7 @@
15
  "filename": "Film_Podcast.mp3",
16
  "language": "fr",
17
  "description": "French podcast discussing various films and cinema",
18
- "duration": "00:03:32",
19
  "url": "https://www.lightbulblanguages.co.uk/resources/audio/film-podcast.mp3"
20
  },
21
  {
@@ -33,7 +33,7 @@
33
  "filename": "Car_Trouble.mp3",
34
  "language": "hi",
35
  "description": "Conversation about waiting for a mechanic and basic assistance",
36
- "duration": "00:02:45",
37
  "url": "https://www.tuttlepublishing.com/content/docs/9780804844383/06-18%20Part2%20Car%20Trouble.mp3"
38
  }
39
  ],
 
6
  "filename": "Yuri_Kizaki.mp3",
7
  "language": "ja",
8
  "description": "Japanese audio message about website communication",
9
+ "duration": "00:00:32",
10
  "url": "https://www.mitsue.co.jp/service/audio_and_video/audio_production/media/narrators_sample/yuri_kizaki/03.mp3"
11
  },
12
  {
 
15
  "filename": "Film_Podcast.mp3",
16
  "language": "fr",
17
  "description": "French podcast discussing various films and cinema",
18
+ "duration": "00:03:50",
19
  "url": "https://www.lightbulblanguages.co.uk/resources/audio/film-podcast.mp3"
20
  },
21
  {
 
33
  "filename": "Car_Trouble.mp3",
34
  "language": "hi",
35
  "description": "Conversation about waiting for a mechanic and basic assistance",
36
+ "duration": "00:00:45",
37
  "url": "https://www.tuttlepublishing.com/content/docs/9780804844383/06-18%20Part2%20Car%20Trouble.mp3"
38
  }
39
  ],
model_preloader.py CHANGED
@@ -38,6 +38,16 @@ logger = logging.getLogger(__name__)
38
 
39
  console = Console()
40
 
 
 
 
 
 
 
 
 
 
 
41
  class ModelPreloader:
42
  """Comprehensive model preloader with enhanced local cache detection."""
43
 
 
38
 
39
  console = Console()
40
 
41
+ # CRITICAL: Set environment variables BEFORE importing any ML libraries
42
+ # This fixes the ONNX Runtime executable stack issue in containers
43
+ os.environ.update({
44
+ 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
45
+ 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
46
+ 'OMP_NUM_THREADS': '1',
47
+ 'TF_ENABLE_ONEDNN_OPTS': '0',
48
+ 'TOKENIZERS_PARALLELISM': 'false'
49
+ })
50
+
51
  class ModelPreloader:
52
  """Comprehensive model preloader with enhanced local cache detection."""
53
 
requirements.txt CHANGED
@@ -1,88 +1,55 @@
1
- # Hugging Face Spaces Compatible Requirements
2
- # Optimized for containerized deployment
3
-
4
- # Core ML Libraries (HF Spaces compatible)
5
  torch==2.0.1
6
- torchvision==0.15.2
7
  torchaudio==2.0.2
8
- transformers
9
 
10
- # Audio Processing (Fixed versions for HF Spaces)
11
- librosa==0.10.1
12
- pydub==0.25.1
13
- soundfile==0.12.1
14
- # Use openai-whisper instead of faster-whisper to avoid CTranslate2 issues
15
- openai-whisper==20231117
16
- audioread==3.0.1
17
- ffmpeg-python==0.2.0
18
 
19
- # Speaker Diarization (Essential for HF Spaces)
20
  pyannote.audio==3.1.1
21
- pyannote.core
22
- pyannote.database
23
- pyannote.metrics==3.2.1
24
-
25
- # Performance & Optimization
26
- numba==0.58.1
27
- # Use CPU-only onnxruntime to avoid executable stack issues
28
- onnxruntime==1.16.3
29
- accelerate==0.20.3
30
 
31
- # Core Utilities
32
- numpy
33
- psutil==5.9.6
34
- python-dotenv==1.0.0
35
- requests==2.31.0
36
- tqdm==4.66.1
37
- ujson==5.8.0
38
- colorlog==6.7.0
39
- pyyaml==6.0.1
40
- python-dateutil==2.8.2
41
 
42
- # Web Framework
43
  fastapi==0.104.1
44
- uvicorn==0.24.0
45
  python-multipart==0.0.6
46
- jinja2==3.1.2
47
  websockets==12.0
48
  aiofiles==23.2.1
49
- aiohttp==3.9.1
50
- httpx
51
 
52
- # Translation APIs
53
- googletrans
54
- deep-translator==1.11.4
55
-
56
- # Scientific Computing
57
  scipy==1.11.4
58
  matplotlib==3.7.3
59
  plotly==5.17.0
60
- scikit-learn==1.3.2
61
-
62
- # PS-6 Specific Dependencies (HF Spaces compatible)
63
- speechbrain==0.5.16
64
- # Remove demucs as it's causing issues in containers
65
- # demucs==4.0.0
66
- PyWavelets==1.4.1
67
-
68
- # NLP
69
- nltk==3.8.1
70
- langdetect==1.0.9
71
 
72
- # Logging & Monitoring
73
  rich==13.7.0
 
 
 
 
 
74
 
75
- # Machine Learning
76
- tensorflow==2.15.0
77
- # Fix tensorboard compatibility
78
- tensorboard==2.15.2
79
-
80
- # Additional Dependencies
81
  huggingface-hub==0.16.4
82
- tokenizers
83
- sentencepiece==0.1.99
84
- protobuf==3.20.3
85
 
86
- # System dependencies for audio processing
87
- webrtcvad==2.0.10
88
- resampy==0.4.2
 
 
1
+ # Core ML libraries with container-friendly versions
 
 
 
2
  torch==2.0.1
 
3
  torchaudio==2.0.2
4
+ torchvision==0.15.2
5
 
6
+ # Keep regular ONNX Runtime with container-safe environment variables
7
+ onnxruntime==1.16.3
 
 
 
 
 
 
8
 
9
+ # Audio processing
10
  pyannote.audio==3.1.1
11
+ openai-whisper==20231117
12
+ librosa==0.10.1
13
+ soundfile==0.12.1
14
+ pydub==0.25.1
15
+ webrtcvad==2.0.10
 
 
 
 
16
 
17
+ # NLP and Translation
18
+ transformers==4.35.2
19
+ tokenizers==0.15.2
20
+ sentencepiece==0.1.99
21
+ deep-translator==1.11.4
22
+ langdetect==1.0.9
23
+ googletrans==4.0.2
 
 
 
24
 
25
+ # Web framework
26
  fastapi==0.104.1
27
+ uvicorn[standard]==0.24.0
28
  python-multipart==0.0.6
 
29
  websockets==12.0
30
  aiofiles==23.2.1
 
 
31
 
32
+ # Data processing
33
+ numpy==1.26.4
34
+ pandas==2.3.2
 
 
35
  scipy==1.11.4
36
  matplotlib==3.7.3
37
  plotly==5.17.0
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Utilities
40
  rich==13.7.0
41
+ tqdm==4.66.1
42
+ psutil==5.9.6
43
+ pyyaml==6.0.1
44
+ python-dotenv==1.0.0
45
+ click==8.1.8
46
 
47
+ # HuggingFace ecosystem
 
 
 
 
 
48
  huggingface-hub==0.16.4
49
+ accelerate==0.20.3
50
+ safetensors==0.6.2
 
51
 
52
+ # Additional utilities
53
+ ffmpeg-python==0.2.0
54
+ httpx==0.28.1
55
+ requests==2.31.0
startup.py CHANGED
@@ -61,9 +61,10 @@ def preload_models():
61
  # Set environment variables to handle onnxruntime issues
62
  env = os.environ.copy()
63
  env.update({
64
- 'ONNX_EXECUTION_PROVIDER': 'cpu',
65
- 'DISABLE_ONNX_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
66
- 'TF_ENABLE_ONEDNN_OPTS': '0'
 
67
  })
68
 
69
  # Try to run the preloader
 
61
  # Set environment variables to handle onnxruntime issues
62
  env = os.environ.copy()
63
  env.update({
64
+ 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
65
+ 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
66
+ 'TF_ENABLE_ONEDNN_OPTS': '0',
67
+ 'OMP_NUM_THREADS': '1'
68
  })
69
 
70
  # Try to run the preloader
web_app.py CHANGED
@@ -124,7 +124,7 @@ DEMO_FILES = {
124
  "expected_translation": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites, you can add new value to the information and effectively differentiate your website from others.",
125
  "category": "business",
126
  "difficulty": "intermediate",
127
- "duration": "00:01:45"
128
  },
129
  "film_podcast": {
130
  "name": "Film Podcast",
@@ -137,7 +137,7 @@ DEMO_FILES = {
137
  "expected_translation": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg and the legal problems this caused for the creator of this site.",
138
  "category": "entertainment",
139
  "difficulty": "advanced",
140
- "duration": "00:03:32"
141
  },
142
  "tamil_interview": {
143
  "name": "Tamil Wikipedia Interview",
@@ -166,7 +166,7 @@ DEMO_FILES = {
166
  "expected_translation": "The car has broken down. We are waiting for the mechanic. It will take some time.",
167
  "category": "daily_life",
168
  "difficulty": "beginner",
169
- "duration": "00:02:45",
170
  "featured": True,
171
  "new": True,
172
  "indian_language": True
@@ -587,7 +587,7 @@ class AudioProcessor:
587
 
588
  if self.pipeline is None:
589
  logger.info("Initializing Audio Intelligence Pipeline...")
590
- try:
591
  self.pipeline = AudioIntelligencePipeline(
592
  whisper_model_size=whisper_model,
593
  target_language=target_language,
@@ -596,7 +596,7 @@ class AudioProcessor:
596
  output_dir="./outputs"
597
  )
598
  logger.info("Pipeline initialization complete!")
599
- except Exception as e:
600
  logger.error(f"Pipeline initialization failed: {e}")
601
  raise
602
 
@@ -740,7 +740,7 @@ async def home(request: Request):
740
  @app.post("/api/upload")
741
  async def upload_audio(
742
  request: Request,
743
- file: UploadFile = File(...),
744
  whisper_model: str = Form("small"),
745
  target_language: str = Form("en"),
746
  hf_token: Optional[str] = Form(None)
@@ -892,7 +892,7 @@ async def get_results(task_id: str):
892
 
893
  else:
894
  # Fallback if results not found
895
- return JSONResponse({
896
  "task_id": task_id,
897
  "status": "complete",
898
  "results": {
 
124
  "expected_translation": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites, you can add new value to the information and effectively differentiate your website from others.",
125
  "category": "business",
126
  "difficulty": "intermediate",
127
+ "duration": "00:00:32"
128
  },
129
  "film_podcast": {
130
  "name": "Film Podcast",
 
137
  "expected_translation": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg and the legal problems this caused for the creator of this site.",
138
  "category": "entertainment",
139
  "difficulty": "advanced",
140
+ "duration": "00:03:50"
141
  },
142
  "tamil_interview": {
143
  "name": "Tamil Wikipedia Interview",
 
166
  "expected_translation": "The car has broken down. We are waiting for the mechanic. It will take some time.",
167
  "category": "daily_life",
168
  "difficulty": "beginner",
169
+ "duration": "00:00:45",
170
  "featured": True,
171
  "new": True,
172
  "indian_language": True
 
587
 
588
  if self.pipeline is None:
589
  logger.info("Initializing Audio Intelligence Pipeline...")
590
+ try:
591
  self.pipeline = AudioIntelligencePipeline(
592
  whisper_model_size=whisper_model,
593
  target_language=target_language,
 
596
  output_dir="./outputs"
597
  )
598
  logger.info("Pipeline initialization complete!")
599
+ except Exception as e:
600
  logger.error(f"Pipeline initialization failed: {e}")
601
  raise
602
 
 
740
  @app.post("/api/upload")
741
  async def upload_audio(
742
  request: Request,
743
+ file: UploadFile = File(...),
744
  whisper_model: str = Form("small"),
745
  target_language: str = Form("en"),
746
  hf_token: Optional[str] = Form(None)
 
892
 
893
  else:
894
  # Fallback if results not found
895
+ return JSONResponse({
896
  "task_id": task_id,
897
  "status": "complete",
898
  "results": {