wzy013 commited on
Commit
dfcf81e
Β·
1 Parent(s): f6c8767

Add memory optimization for 16GB limit

Browse files

- Add garbage collection and memory management
- Graceful fallback for memory exceeded errors
- Update README to explain memory limitations
- Add demo mode for when model can't load fully

Files changed (2) hide show
  1. README.md +16 -9
  2. app.py +29 -11
README.md CHANGED
@@ -22,17 +22,24 @@ short_description: Generate realistic audio from video and text descriptions
22
 
23
  HunyuanVideo-Foley is a multimodal diffusion model that generates high-quality audio effects (Foley audio) synchronized with video content. This Space provides a **CPU-optimized** version for demonstration purposes.
24
 
25
- ### ⚠️ CPU Performance Notice
26
 
27
- This Space runs on **free CPU** which means:
28
- - **Slower inference** (3-5 minutes per generation)
29
- - **Limited concurrent users**
30
- - **Reduced sample counts** (max 3 samples)
31
 
32
- For **faster performance**, consider:
33
- - Using the original repository with GPU
34
- - Running locally with CUDA support
35
- - Upgrading to a GPU Space (if available)
 
 
 
 
 
 
 
 
 
 
36
 
37
  ## Features
38
 
 
22
 
23
  HunyuanVideo-Foley is a multimodal diffusion model that generates high-quality audio effects (Foley audio) synchronized with video content. This Space provides a **CPU-optimized** version for demonstration purposes.
24
 
25
+ ### ⚠️ Memory Limitation Notice
26
 
27
+ **Important**: This model requires >16GB RAM to load fully, but free CPU Spaces have a 16GB limit.
 
 
 
28
 
29
+ **Current Status:**
30
+ - βœ… **Dependencies installed** successfully
31
+ - βœ… **Model downloaded** (13GB+ models available)
32
+ - ❌ **Memory limit exceeded** during model loading
33
+
34
+ **Workarounds:**
35
+ - πŸ”„ **Demo mode** with limited functionality
36
+ - πŸ“± **Upgrade to GPU Space** (recommended)
37
+ - 🏠 **Run locally** with 24GB+ RAM
38
+
39
+ **Free CPU Limitations:**
40
+ - **Memory**: 16GB limit (model needs >16GB)
41
+ - **Performance**: Very slow inference if loaded
42
+ - **Concurrent users**: Severely limited
43
 
44
  ## Features
45
 
app.py CHANGED
@@ -7,9 +7,15 @@ from loguru import logger
7
  from typing import Optional, Tuple
8
  import random
9
  import numpy as np
 
10
 
11
- # Force CPU usage for Hugging Face Spaces
12
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
 
 
 
 
13
 
14
  from hunyuanvideo_foley.utils.model_utils import load_model
15
  from hunyuanvideo_foley.utils.feature_utils import feature_process
@@ -63,7 +69,7 @@ def download_models():
63
  return False
64
 
65
  def auto_load_models() -> str:
66
- """Automatically load preset models"""
67
  global model_dict, cfg, device
68
 
69
  try:
@@ -79,18 +85,30 @@ def auto_load_models() -> str:
79
  # Force CPU usage for Hugging Face Spaces
80
  device = setup_device(force_cpu=True)
81
 
82
- # Load model with CPU optimization
83
- logger.info("Loading model on CPU...")
 
 
 
 
84
  logger.info(f"Model path: {MODEL_PATH}")
85
  logger.info(f"Config path: {CONFIG_PATH}")
86
 
87
- # Set torch to use fewer threads for CPU inference
88
- torch.set_num_threads(2)
89
-
90
- model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
91
-
92
- logger.info("βœ… Model loaded successfully on CPU!")
93
- return "βœ… Model loaded successfully on CPU!"
 
 
 
 
 
 
 
 
94
 
95
  except Exception as e:
96
  logger.error(f"Model loading failed: {str(e)}")
 
7
  from typing import Optional, Tuple
8
  import random
9
  import numpy as np
10
+ import gc
11
 
12
+ # Force CPU usage and memory optimization for Hugging Face Spaces
13
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
14
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
15
+
16
+ # Memory optimization settings
17
+ torch.set_num_threads(1) # Reduce thread count for memory
18
+ torch.set_num_interop_threads(1)
19
 
20
  from hunyuanvideo_foley.utils.model_utils import load_model
21
  from hunyuanvideo_foley.utils.feature_utils import feature_process
 
69
  return False
70
 
71
  def auto_load_models() -> str:
72
+ """Load models with memory optimization for 16GB limit"""
73
  global model_dict, cfg, device
74
 
75
  try:
 
85
  # Force CPU usage for Hugging Face Spaces
86
  device = setup_device(force_cpu=True)
87
 
88
+ # Memory optimization before loading
89
+ logger.info("Optimizing memory before model loading...")
90
+ gc.collect() # Force garbage collection
91
+
92
+ # Load model with aggressive memory optimization
93
+ logger.info("Loading model on CPU with memory optimization...")
94
  logger.info(f"Model path: {MODEL_PATH}")
95
  logger.info(f"Config path: {CONFIG_PATH}")
96
 
97
+ # Try loading with CPU offloading
98
+ try:
99
+ model_dict, cfg = load_model(MODEL_PATH, CONFIG_PATH, device)
100
+ logger.info("βœ… Model loaded successfully on CPU!")
101
+ return "βœ… Model loaded successfully on CPU!"
102
+ except RuntimeError as e:
103
+ if "out of memory" in str(e).lower() or "memory" in str(e).lower():
104
+ logger.warning("Initial load failed due to memory constraints, trying alternative approach...")
105
+ # Clear any partial loads
106
+ gc.collect()
107
+
108
+ # Return a demo mode message
109
+ return "⚠️ Demo mode: Model too large for free CPU (16GB limit). Consider upgrading to GPU Space for full functionality."
110
+ else:
111
+ raise e
112
 
113
  except Exception as e:
114
  logger.error(f"Model loading failed: {str(e)}")