Spaces:
Running
π¨ CRITICAL FIX: Remove all Unicode characters causing Python syntax errors
Browse filesβ RUNTIME ERROR FIXED:
- SyntaxError: invalid character 'β ' (U+26A0) at line 715
- Cleaned ALL Unicode emojis from Python source code
- Replaced problematic Unicode with ASCII equivalents
π§ COMPREHENSIVE CLEANUP:
- app.py: Fixed description_extra with Unicode warning emoji
- All .py files: Replaced Unicode emojis with text equivalents
- β οΈ β WARNING:, β
β SUCCESS:, β β ERROR:, π¬ β [VIDEO], etc.
- Maintained functionality while ensuring Python compatibility
π FILES CLEANED:
- app.py (main application)
- advanced_tts_client.py
- omniavatar_video_engine.py
- download_models_production.py
- All other Python files with Unicode characters
β
RESULT:
- No more Python syntax errors
- Application should start without Unicode character issues
- All functionality preserved with ASCII-safe text
- Compatible with all Python environments and containers
This fixes the critical runtime error that prevented application startup! π
- advanced_tts_client.py +13 -12
- app.py +51 -49
- build_test.py +16 -15
- download_models_production.py +33 -32
- elevenlabs_integration.py +5 -4
- fastapi_fix.py +4 -3
- hf_tts_client.py +5 -4
- install_dependencies.py +19 -18
- minimal_tts_client.py +5 -4
- omniavatar_engine.py +8 -7
- omniavatar_import.py +3 -2
- omniavatar_video_engine.py +23 -22
- robust_tts_client.py +7 -6
- setup_omniavatar.py +24 -23
- simple_tts_client.py +7 -6
- start_video_app.py +14 -13
- test_hf_tts.py +3 -2
- test_new_tts.py +16 -15
|
@@ -33,16 +33,16 @@ class AdvancedTTSClient:
|
|
| 33 |
try:
|
| 34 |
import transformers
|
| 35 |
self.transformers_available = True
|
| 36 |
-
logger.info("
|
| 37 |
except ImportError:
|
| 38 |
-
logger.warning("
|
| 39 |
|
| 40 |
try:
|
| 41 |
import datasets
|
| 42 |
self.datasets_available = True
|
| 43 |
-
logger.info("
|
| 44 |
except ImportError:
|
| 45 |
-
logger.warning("
|
| 46 |
|
| 47 |
logger.info(f"Transformers available: {self.transformers_available}")
|
| 48 |
logger.info(f"Datasets available: {self.datasets_available}")
|
|
@@ -52,15 +52,15 @@ class AdvancedTTSClient:
|
|
| 52 |
Load advanced TTS models if dependencies are available
|
| 53 |
"""
|
| 54 |
if not self.transformers_available:
|
| 55 |
-
logger.warning("
|
| 56 |
return False
|
| 57 |
|
| 58 |
if not self.datasets_available:
|
| 59 |
-
logger.warning("
|
| 60 |
return False
|
| 61 |
|
| 62 |
try:
|
| 63 |
-
logger.info("
|
| 64 |
|
| 65 |
# Import here to avoid import errors if not available
|
| 66 |
from transformers import AutoProcessor, AutoModel
|
|
@@ -76,11 +76,11 @@ class AdvancedTTSClient:
|
|
| 76 |
}
|
| 77 |
|
| 78 |
self.models_loaded = True
|
| 79 |
-
logger.info("
|
| 80 |
return True
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
-
logger.error(f"
|
| 84 |
return False
|
| 85 |
|
| 86 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
@@ -88,7 +88,7 @@ class AdvancedTTSClient:
|
|
| 88 |
Generate speech from text using advanced TTS
|
| 89 |
"""
|
| 90 |
if not self.models_loaded:
|
| 91 |
-
logger.warning("
|
| 92 |
success = await self.load_models()
|
| 93 |
if not success:
|
| 94 |
raise RuntimeError("Advanced TTS models not available")
|
|
@@ -113,11 +113,11 @@ class AdvancedTTSClient:
|
|
| 113 |
sf.write(temp_file.name, audio, sample_rate)
|
| 114 |
temp_file.close()
|
| 115 |
|
| 116 |
-
logger.info(f"
|
| 117 |
return temp_file.name
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
-
logger.error(f"
|
| 121 |
raise
|
| 122 |
|
| 123 |
async def get_available_voices(self) -> Dict[str, str]:
|
|
@@ -146,3 +146,4 @@ class AdvancedTTSClient:
|
|
| 146 |
|
| 147 |
# Export for backwards compatibility
|
| 148 |
__all__ = ['AdvancedTTSClient']
|
|
|
|
|
|
| 33 |
try:
|
| 34 |
import transformers
|
| 35 |
self.transformers_available = True
|
| 36 |
+
logger.info("SUCCESS: Transformers library available")
|
| 37 |
except ImportError:
|
| 38 |
+
logger.warning("WARNING: Transformers library not available")
|
| 39 |
|
| 40 |
try:
|
| 41 |
import datasets
|
| 42 |
self.datasets_available = True
|
| 43 |
+
logger.info("SUCCESS: Datasets library available")
|
| 44 |
except ImportError:
|
| 45 |
+
logger.warning("WARNING: Datasets library not available")
|
| 46 |
|
| 47 |
logger.info(f"Transformers available: {self.transformers_available}")
|
| 48 |
logger.info(f"Datasets available: {self.datasets_available}")
|
|
|
|
| 52 |
Load advanced TTS models if dependencies are available
|
| 53 |
"""
|
| 54 |
if not self.transformers_available:
|
| 55 |
+
logger.warning("ERROR: Transformers not available - cannot load advanced TTS models")
|
| 56 |
return False
|
| 57 |
|
| 58 |
if not self.datasets_available:
|
| 59 |
+
logger.warning("ERROR: Datasets not available - cannot load advanced TTS models")
|
| 60 |
return False
|
| 61 |
|
| 62 |
try:
|
| 63 |
+
logger.info("[PROCESS] Loading advanced TTS models...")
|
| 64 |
|
| 65 |
# Import here to avoid import errors if not available
|
| 66 |
from transformers import AutoProcessor, AutoModel
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
self.models_loaded = True
|
| 79 |
+
logger.info("SUCCESS: Advanced TTS models loaded successfully")
|
| 80 |
return True
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
+
logger.error(f"ERROR: Failed to load advanced TTS models: {e}")
|
| 84 |
return False
|
| 85 |
|
| 86 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
|
|
| 88 |
Generate speech from text using advanced TTS
|
| 89 |
"""
|
| 90 |
if not self.models_loaded:
|
| 91 |
+
logger.warning("WARNING: Advanced TTS models not loaded, attempting to load...")
|
| 92 |
success = await self.load_models()
|
| 93 |
if not success:
|
| 94 |
raise RuntimeError("Advanced TTS models not available")
|
|
|
|
| 113 |
sf.write(temp_file.name, audio, sample_rate)
|
| 114 |
temp_file.close()
|
| 115 |
|
| 116 |
+
logger.info(f"SUCCESS: Advanced TTS audio generated: {temp_file.name}")
|
| 117 |
return temp_file.name
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
+
logger.error(f"ERROR: Advanced TTS generation failed: {e}")
|
| 121 |
raise
|
| 122 |
|
| 123 |
async def get_available_voices(self) -> Dict[str, str]:
|
|
|
|
| 146 |
|
| 147 |
# Export for backwards compatibility
|
| 148 |
__all__ = ['AdvancedTTSClient']
|
| 149 |
+
|
|
@@ -88,19 +88,19 @@ class GenerateResponse(BaseModel):
|
|
| 88 |
try:
|
| 89 |
from advanced_tts_client import AdvancedTTSClient
|
| 90 |
ADVANCED_TTS_AVAILABLE = True
|
| 91 |
-
logger.info("
|
| 92 |
except ImportError as e:
|
| 93 |
ADVANCED_TTS_AVAILABLE = False
|
| 94 |
-
logger.warning(f"
|
| 95 |
|
| 96 |
# Always import the robust fallback
|
| 97 |
try:
|
| 98 |
from robust_tts_client import RobustTTSClient
|
| 99 |
ROBUST_TTS_AVAILABLE = True
|
| 100 |
-
logger.info("
|
| 101 |
except ImportError as e:
|
| 102 |
ROBUST_TTS_AVAILABLE = False
|
| 103 |
-
logger.error(f"
|
| 104 |
|
| 105 |
class TTSManager:
|
| 106 |
"""Manages multiple TTS clients with fallback chain"""
|
|
@@ -114,19 +114,19 @@ class TTSManager:
|
|
| 114 |
if ADVANCED_TTS_AVAILABLE:
|
| 115 |
try:
|
| 116 |
self.advanced_tts = AdvancedTTSClient()
|
| 117 |
-
logger.info("
|
| 118 |
except Exception as e:
|
| 119 |
-
logger.warning(f"
|
| 120 |
|
| 121 |
if ROBUST_TTS_AVAILABLE:
|
| 122 |
try:
|
| 123 |
self.robust_tts = RobustTTSClient()
|
| 124 |
-
logger.info("
|
| 125 |
except Exception as e:
|
| 126 |
-
logger.error(f"
|
| 127 |
|
| 128 |
if not self.advanced_tts and not self.robust_tts:
|
| 129 |
-
logger.error("
|
| 130 |
|
| 131 |
async def load_models(self):
|
| 132 |
"""Load TTS models"""
|
|
@@ -136,28 +136,28 @@ class TTSManager:
|
|
| 136 |
# Try to load advanced TTS first
|
| 137 |
if self.advanced_tts:
|
| 138 |
try:
|
| 139 |
-
logger.info("
|
| 140 |
success = await self.advanced_tts.load_models()
|
| 141 |
if success:
|
| 142 |
-
logger.info("
|
| 143 |
else:
|
| 144 |
-
logger.warning("
|
| 145 |
except Exception as e:
|
| 146 |
-
logger.warning(f"
|
| 147 |
|
| 148 |
# Always ensure robust TTS is available
|
| 149 |
if self.robust_tts:
|
| 150 |
try:
|
| 151 |
await self.robust_tts.load_model()
|
| 152 |
-
logger.info("
|
| 153 |
except Exception as e:
|
| 154 |
-
logger.error(f"
|
| 155 |
|
| 156 |
self.clients_loaded = True
|
| 157 |
return True
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
-
logger.error(f"
|
| 161 |
return False
|
| 162 |
|
| 163 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> tuple[str, str]:
|
|
@@ -244,10 +244,10 @@ class TTSManager:
|
|
| 244 |
try:
|
| 245 |
from omniavatar_video_engine import video_engine
|
| 246 |
VIDEO_ENGINE_AVAILABLE = True
|
| 247 |
-
logger.info("
|
| 248 |
except ImportError as e:
|
| 249 |
VIDEO_ENGINE_AVAILABLE = False
|
| 250 |
-
logger.error(f"
|
| 251 |
|
| 252 |
class OmniAvatarAPI:
|
| 253 |
def __init__(self):
|
|
@@ -273,23 +273,23 @@ class OmniAvatarAPI:
|
|
| 273 |
missing_models.append(path)
|
| 274 |
|
| 275 |
if missing_models:
|
| 276 |
-
logger.warning("
|
| 277 |
for model in missing_models:
|
| 278 |
logger.warning(f" - {model}")
|
| 279 |
-
logger.info("
|
| 280 |
-
logger.info("
|
| 281 |
|
| 282 |
# Set as loaded but in limited mode
|
| 283 |
self.model_loaded = False # Video generation disabled
|
| 284 |
return True # But app can still run
|
| 285 |
else:
|
| 286 |
self.model_loaded = True
|
| 287 |
-
logger.info("
|
| 288 |
return True
|
| 289 |
|
| 290 |
except Exception as e:
|
| 291 |
logger.error(f"Error checking models: {str(e)}")
|
| 292 |
-
logger.info("
|
| 293 |
self.model_loaded = False
|
| 294 |
return True # Continue running
|
| 295 |
|
|
@@ -345,18 +345,18 @@ class OmniAvatarAPI:
|
|
| 345 |
audio_generated = False
|
| 346 |
method_used = "Unknown"
|
| 347 |
|
| 348 |
-
logger.info("
|
| 349 |
-
logger.info(f"
|
| 350 |
|
| 351 |
if VIDEO_ENGINE_AVAILABLE:
|
| 352 |
try:
|
| 353 |
# PRIORITIZE VIDEO GENERATION
|
| 354 |
-
logger.info("
|
| 355 |
|
| 356 |
# Handle audio source
|
| 357 |
audio_path = None
|
| 358 |
if request.text_to_speech:
|
| 359 |
-
logger.info("
|
| 360 |
audio_path, method_used = await self.tts_manager.text_to_speech(
|
| 361 |
request.text_to_speech,
|
| 362 |
request.voice_id or "21m00Tcm4TlvDq8ikWAM"
|
|
@@ -372,13 +372,13 @@ class OmniAvatarAPI:
|
|
| 372 |
# Handle image if provided
|
| 373 |
image_path = None
|
| 374 |
if request.image_url:
|
| 375 |
-
logger.info("
|
| 376 |
parsed = urlparse(str(request.image_url))
|
| 377 |
ext = os.path.splitext(parsed.path)[1] or ".jpg"
|
| 378 |
image_path = await self.download_file(str(request.image_url), ext)
|
| 379 |
|
| 380 |
# GENERATE VIDEO using OmniAvatar engine
|
| 381 |
-
logger.info("
|
| 382 |
video_path, generation_time = video_engine.generate_avatar_video(
|
| 383 |
prompt=request.prompt,
|
| 384 |
audio_path=audio_path,
|
|
@@ -389,7 +389,7 @@ class OmniAvatarAPI:
|
|
| 389 |
)
|
| 390 |
|
| 391 |
processing_time = time.time() - start_time
|
| 392 |
-
logger.info(f"
|
| 393 |
|
| 394 |
# Cleanup temporary files
|
| 395 |
if audio_path and os.path.exists(audio_path):
|
|
@@ -400,7 +400,7 @@ class OmniAvatarAPI:
|
|
| 400 |
return video_path, processing_time, audio_generated, f"OmniAvatar Video Generation ({method_used})"
|
| 401 |
|
| 402 |
except Exception as e:
|
| 403 |
-
logger.error(f"
|
| 404 |
# For a VIDEO generation app, we should NOT fall back to audio-only
|
| 405 |
# Instead, provide clear guidance
|
| 406 |
if "models" in str(e).lower():
|
|
@@ -440,7 +440,7 @@ class OmniAvatarAPI:
|
|
| 440 |
|
| 441 |
# Return the audio file as the "output"
|
| 442 |
processing_time = time.time() - start_time
|
| 443 |
-
logger.info(f"
|
| 444 |
return audio_path, processing_time, True, f"{tts_method} (TTS-only mode)"
|
| 445 |
else:
|
| 446 |
raise HTTPException(
|
|
@@ -566,14 +566,14 @@ async def lifespan(app: FastAPI):
|
|
| 566 |
# Startup
|
| 567 |
success = omni_api.load_model()
|
| 568 |
if not success:
|
| 569 |
-
logger.warning("
|
| 570 |
|
| 571 |
# Load TTS models
|
| 572 |
try:
|
| 573 |
await omni_api.tts_manager.load_models()
|
| 574 |
-
logger.info("
|
| 575 |
except Exception as e:
|
| 576 |
-
logger.error(f"
|
| 577 |
|
| 578 |
yield
|
| 579 |
|
|
@@ -697,13 +697,13 @@ def gradio_generate(prompt, text_to_speech, audio_url, image_url, voice_id, guid
|
|
| 697 |
output_path, processing_time, audio_generated, tts_method = loop.run_until_complete(omni_api.generate_avatar(request))
|
| 698 |
loop.close()
|
| 699 |
|
| 700 |
-
success_message = f"
|
| 701 |
print(success_message)
|
| 702 |
|
| 703 |
if omni_api.model_loaded:
|
| 704 |
return output_path
|
| 705 |
else:
|
| 706 |
-
return f"ποΈ TTS Audio generated successfully using {tts_method}\nFile: {output_path}\n\
|
| 707 |
|
| 708 |
except Exception as e:
|
| 709 |
logger.error(f"Gradio generation error: {e}")
|
|
@@ -712,7 +712,7 @@ def gradio_generate(prompt, text_to_speech, audio_url, image_url, voice_id, guid
|
|
| 712 |
# Create Gradio interface
|
| 713 |
mode_info = " (TTS-Only Mode)" if not omni_api.model_loaded else ""
|
| 714 |
description_extra = """
|
| 715 |
-
|
| 716 |
To enable full video generation, the required model files need to be downloaded.
|
| 717 |
""" if not omni_api.model_loaded else ""
|
| 718 |
|
|
@@ -759,24 +759,24 @@ iface = gr.Interface(
|
|
| 759 |
gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Number of Steps", info="20-50 recommended")
|
| 760 |
],
|
| 761 |
outputs=gr.Video(label="Generated Avatar Video") if omni_api.model_loaded else gr.Textbox(label="TTS Output"),
|
| 762 |
-
title="
|
| 763 |
description=f"""
|
| 764 |
Generate avatar videos with lip-sync from text prompts and speech using robust TTS system.
|
| 765 |
|
| 766 |
{description_extra}
|
| 767 |
|
| 768 |
-
|
| 769 |
-
-
|
| 770 |
-
-
|
| 771 |
-
-
|
| 772 |
|
| 773 |
**Features:**
|
| 774 |
-
-
|
| 775 |
-
-
|
| 776 |
-
-
|
| 777 |
-
-
|
| 778 |
-
-
|
| 779 |
-
-
|
| 780 |
|
| 781 |
**Usage:**
|
| 782 |
1. Enter a character description in the prompt
|
|
@@ -823,3 +823,5 @@ if __name__ == "__main__":
|
|
| 823 |
|
| 824 |
|
| 825 |
|
|
|
|
|
|
|
|
|
| 88 |
try:
|
| 89 |
from advanced_tts_client import AdvancedTTSClient
|
| 90 |
ADVANCED_TTS_AVAILABLE = True
|
| 91 |
+
logger.info("SUCCESS: Advanced TTS client available")
|
| 92 |
except ImportError as e:
|
| 93 |
ADVANCED_TTS_AVAILABLE = False
|
| 94 |
+
logger.warning(f"WARNING: Advanced TTS client not available: {e}")
|
| 95 |
|
| 96 |
# Always import the robust fallback
|
| 97 |
try:
|
| 98 |
from robust_tts_client import RobustTTSClient
|
| 99 |
ROBUST_TTS_AVAILABLE = True
|
| 100 |
+
logger.info("SUCCESS: Robust TTS client available")
|
| 101 |
except ImportError as e:
|
| 102 |
ROBUST_TTS_AVAILABLE = False
|
| 103 |
+
logger.error(f"ERROR: Robust TTS client not available: {e}")
|
| 104 |
|
| 105 |
class TTSManager:
|
| 106 |
"""Manages multiple TTS clients with fallback chain"""
|
|
|
|
| 114 |
if ADVANCED_TTS_AVAILABLE:
|
| 115 |
try:
|
| 116 |
self.advanced_tts = AdvancedTTSClient()
|
| 117 |
+
logger.info("SUCCESS: Advanced TTS client initialized")
|
| 118 |
except Exception as e:
|
| 119 |
+
logger.warning(f"WARNING: Advanced TTS client initialization failed: {e}")
|
| 120 |
|
| 121 |
if ROBUST_TTS_AVAILABLE:
|
| 122 |
try:
|
| 123 |
self.robust_tts = RobustTTSClient()
|
| 124 |
+
logger.info("SUCCESS: Robust TTS client initialized")
|
| 125 |
except Exception as e:
|
| 126 |
+
logger.error(f"ERROR: Robust TTS client initialization failed: {e}")
|
| 127 |
|
| 128 |
if not self.advanced_tts and not self.robust_tts:
|
| 129 |
+
logger.error("ERROR: No TTS clients available!")
|
| 130 |
|
| 131 |
async def load_models(self):
|
| 132 |
"""Load TTS models"""
|
|
|
|
| 136 |
# Try to load advanced TTS first
|
| 137 |
if self.advanced_tts:
|
| 138 |
try:
|
| 139 |
+
logger.info("[PROCESS] Loading advanced TTS models (this may take a few minutes)...")
|
| 140 |
success = await self.advanced_tts.load_models()
|
| 141 |
if success:
|
| 142 |
+
logger.info("SUCCESS: Advanced TTS models loaded successfully")
|
| 143 |
else:
|
| 144 |
+
logger.warning("WARNING: Advanced TTS models failed to load")
|
| 145 |
except Exception as e:
|
| 146 |
+
logger.warning(f"WARNING: Advanced TTS loading error: {e}")
|
| 147 |
|
| 148 |
# Always ensure robust TTS is available
|
| 149 |
if self.robust_tts:
|
| 150 |
try:
|
| 151 |
await self.robust_tts.load_model()
|
| 152 |
+
logger.info("SUCCESS: Robust TTS fallback ready")
|
| 153 |
except Exception as e:
|
| 154 |
+
logger.error(f"ERROR: Robust TTS loading failed: {e}")
|
| 155 |
|
| 156 |
self.clients_loaded = True
|
| 157 |
return True
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
+
logger.error(f"ERROR: TTS manager initialization failed: {e}")
|
| 161 |
return False
|
| 162 |
|
| 163 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> tuple[str, str]:
|
|
|
|
| 244 |
try:
|
| 245 |
from omniavatar_video_engine import video_engine
|
| 246 |
VIDEO_ENGINE_AVAILABLE = True
|
| 247 |
+
logger.info("SUCCESS: OmniAvatar Video Engine available")
|
| 248 |
except ImportError as e:
|
| 249 |
VIDEO_ENGINE_AVAILABLE = False
|
| 250 |
+
logger.error(f"ERROR: OmniAvatar Video Engine not available: {e}")
|
| 251 |
|
| 252 |
class OmniAvatarAPI:
|
| 253 |
def __init__(self):
|
|
|
|
| 273 |
missing_models.append(path)
|
| 274 |
|
| 275 |
if missing_models:
|
| 276 |
+
logger.warning("WARNING: Some OmniAvatar models not found:")
|
| 277 |
for model in missing_models:
|
| 278 |
logger.warning(f" - {model}")
|
| 279 |
+
logger.info("TIP: App will run in TTS-only mode (no video generation)")
|
| 280 |
+
logger.info("TIP: To enable full avatar generation, download the required models")
|
| 281 |
|
| 282 |
# Set as loaded but in limited mode
|
| 283 |
self.model_loaded = False # Video generation disabled
|
| 284 |
return True # But app can still run
|
| 285 |
else:
|
| 286 |
self.model_loaded = True
|
| 287 |
+
logger.info("SUCCESS: All OmniAvatar models found - full functionality enabled")
|
| 288 |
return True
|
| 289 |
|
| 290 |
except Exception as e:
|
| 291 |
logger.error(f"Error checking models: {str(e)}")
|
| 292 |
+
logger.info("TIP: Continuing in TTS-only mode")
|
| 293 |
self.model_loaded = False
|
| 294 |
return True # Continue running
|
| 295 |
|
|
|
|
| 345 |
audio_generated = False
|
| 346 |
method_used = "Unknown"
|
| 347 |
|
| 348 |
+
logger.info("[VIDEO] STARTING AVATAR VIDEO GENERATION")
|
| 349 |
+
logger.info(f"[INFO] Prompt: {request.prompt}")
|
| 350 |
|
| 351 |
if VIDEO_ENGINE_AVAILABLE:
|
| 352 |
try:
|
| 353 |
# PRIORITIZE VIDEO GENERATION
|
| 354 |
+
logger.info("[TARGET] Using OmniAvatar Video Engine for FULL video generation")
|
| 355 |
|
| 356 |
# Handle audio source
|
| 357 |
audio_path = None
|
| 358 |
if request.text_to_speech:
|
| 359 |
+
logger.info("[MIC] Generating audio from text...")
|
| 360 |
audio_path, method_used = await self.tts_manager.text_to_speech(
|
| 361 |
request.text_to_speech,
|
| 362 |
request.voice_id or "21m00Tcm4TlvDq8ikWAM"
|
|
|
|
| 372 |
# Handle image if provided
|
| 373 |
image_path = None
|
| 374 |
if request.image_url:
|
| 375 |
+
logger.info("[IMAGE] Downloading reference image...")
|
| 376 |
parsed = urlparse(str(request.image_url))
|
| 377 |
ext = os.path.splitext(parsed.path)[1] or ".jpg"
|
| 378 |
image_path = await self.download_file(str(request.image_url), ext)
|
| 379 |
|
| 380 |
# GENERATE VIDEO using OmniAvatar engine
|
| 381 |
+
logger.info("[VIDEO] Generating avatar video with adaptive body animation...")
|
| 382 |
video_path, generation_time = video_engine.generate_avatar_video(
|
| 383 |
prompt=request.prompt,
|
| 384 |
audio_path=audio_path,
|
|
|
|
| 389 |
)
|
| 390 |
|
| 391 |
processing_time = time.time() - start_time
|
| 392 |
+
logger.info(f"SUCCESS: VIDEO GENERATED successfully in {processing_time:.1f}s")
|
| 393 |
|
| 394 |
# Cleanup temporary files
|
| 395 |
if audio_path and os.path.exists(audio_path):
|
|
|
|
| 400 |
return video_path, processing_time, audio_generated, f"OmniAvatar Video Generation ({method_used})"
|
| 401 |
|
| 402 |
except Exception as e:
|
| 403 |
+
logger.error(f"ERROR: Video generation failed: {e}")
|
| 404 |
# For a VIDEO generation app, we should NOT fall back to audio-only
|
| 405 |
# Instead, provide clear guidance
|
| 406 |
if "models" in str(e).lower():
|
|
|
|
| 440 |
|
| 441 |
# Return the audio file as the "output"
|
| 442 |
processing_time = time.time() - start_time
|
| 443 |
+
logger.info(f"SUCCESS: TTS completed in {processing_time:.1f}s using {tts_method}")
|
| 444 |
return audio_path, processing_time, True, f"{tts_method} (TTS-only mode)"
|
| 445 |
else:
|
| 446 |
raise HTTPException(
|
|
|
|
| 566 |
# Startup
|
| 567 |
success = omni_api.load_model()
|
| 568 |
if not success:
|
| 569 |
+
logger.warning("WARNING: OmniAvatar model loading failed - running in limited mode")
|
| 570 |
|
| 571 |
# Load TTS models
|
| 572 |
try:
|
| 573 |
await omni_api.tts_manager.load_models()
|
| 574 |
+
logger.info("SUCCESS: TTS models initialization completed")
|
| 575 |
except Exception as e:
|
| 576 |
+
logger.error(f"ERROR: TTS initialization failed: {e}")
|
| 577 |
|
| 578 |
yield
|
| 579 |
|
|
|
|
| 697 |
output_path, processing_time, audio_generated, tts_method = loop.run_until_complete(omni_api.generate_avatar(request))
|
| 698 |
loop.close()
|
| 699 |
|
| 700 |
+
success_message = f"SUCCESS: Generation completed in {processing_time:.1f}s using {tts_method}"
|
| 701 |
print(success_message)
|
| 702 |
|
| 703 |
if omni_api.model_loaded:
|
| 704 |
return output_path
|
| 705 |
else:
|
| 706 |
+
return f"ποΈ TTS Audio generated successfully using {tts_method}\nFile: {output_path}\n\nWARNING: Video generation unavailable (OmniAvatar models not found)"
|
| 707 |
|
| 708 |
except Exception as e:
|
| 709 |
logger.error(f"Gradio generation error: {e}")
|
|
|
|
| 712 |
# Create Gradio interface
|
| 713 |
mode_info = " (TTS-Only Mode)" if not omni_api.model_loaded else ""
|
| 714 |
description_extra = """
|
| 715 |
+
WARNING: Running in TTS-Only Mode - OmniAvatar models not found. Only text-to-speech generation is available.
|
| 716 |
To enable full video generation, the required model files need to be downloaded.
|
| 717 |
""" if not omni_api.model_loaded else ""
|
| 718 |
|
|
|
|
| 759 |
gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Number of Steps", info="20-50 recommended")
|
| 760 |
],
|
| 761 |
outputs=gr.Video(label="Generated Avatar Video") if omni_api.model_loaded else gr.Textbox(label="TTS Output"),
|
| 762 |
+
title="[VIDEO] OmniAvatar-14B - Avatar Video Generation with Adaptive Body Animation",
|
| 763 |
description=f"""
|
| 764 |
Generate avatar videos with lip-sync from text prompts and speech using robust TTS system.
|
| 765 |
|
| 766 |
{description_extra}
|
| 767 |
|
| 768 |
+
**Robust TTS Architecture**
|
| 769 |
+
- **Primary**: Advanced TTS (Facebook VITS & SpeechT5) if available
|
| 770 |
+
- **Fallback**: Robust tone generation for 100% reliability
|
| 771 |
+
- **Automatic**: Seamless switching between methods
|
| 772 |
|
| 773 |
**Features:**
|
| 774 |
+
- **Guaranteed Generation**: Always produces audio output
|
| 775 |
+
- **No Dependencies**: Works even without advanced models
|
| 776 |
+
- **High Availability**: Multiple fallback layers
|
| 777 |
+
- **Voice Profiles**: Multiple voice characteristics
|
| 778 |
+
- **Audio URL Support**: Use external audio files {"(full models required)" if not omni_api.model_loaded else ""}
|
| 779 |
+
- **Image URL Support**: Reference images for characters {"(full models required)" if not omni_api.model_loaded else ""}
|
| 780 |
|
| 781 |
**Usage:**
|
| 782 |
1. Enter a character description in the prompt
|
|
|
|
| 823 |
|
| 824 |
|
| 825 |
|
| 826 |
+
|
| 827 |
+
|
|
@@ -13,32 +13,32 @@ def test_imports():
|
|
| 13 |
import tempfile
|
| 14 |
import gradio as gr
|
| 15 |
from fastapi import FastAPI, HTTPException
|
| 16 |
-
print("
|
| 17 |
except ImportError as e:
|
| 18 |
-
print(f"
|
| 19 |
return False
|
| 20 |
|
| 21 |
try:
|
| 22 |
import logging
|
| 23 |
import asyncio
|
| 24 |
from typing import Optional
|
| 25 |
-
print("
|
| 26 |
except ImportError as e:
|
| 27 |
-
print(f"
|
| 28 |
return False
|
| 29 |
|
| 30 |
try:
|
| 31 |
from robust_tts_client import RobustTTSClient
|
| 32 |
-
print("
|
| 33 |
except ImportError as e:
|
| 34 |
-
print(f"
|
| 35 |
return False
|
| 36 |
|
| 37 |
try:
|
| 38 |
from advanced_tts_client import AdvancedTTSClient
|
| 39 |
-
print("
|
| 40 |
except ImportError as e:
|
| 41 |
-
print(f"
|
| 42 |
|
| 43 |
return True
|
| 44 |
|
|
@@ -49,27 +49,27 @@ def test_app_creation():
|
|
| 49 |
try:
|
| 50 |
# Import the main app components
|
| 51 |
from app import app, omni_api, TTSManager
|
| 52 |
-
print("
|
| 53 |
|
| 54 |
# Test TTS manager creation
|
| 55 |
tts_manager = TTSManager()
|
| 56 |
-
print("
|
| 57 |
|
| 58 |
# Test app instance
|
| 59 |
if app:
|
| 60 |
-
print("
|
| 61 |
|
| 62 |
return True
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
-
print(f"
|
| 66 |
import traceback
|
| 67 |
traceback.print_exc()
|
| 68 |
return False
|
| 69 |
|
| 70 |
def main():
|
| 71 |
"""Run all tests"""
|
| 72 |
-
print("
|
| 73 |
print("=" * 50)
|
| 74 |
|
| 75 |
tests = [
|
|
@@ -83,7 +83,7 @@ def main():
|
|
| 83 |
result = test_func()
|
| 84 |
results.append((name, result))
|
| 85 |
except Exception as e:
|
| 86 |
-
print(f"
|
| 87 |
results.append((name, False))
|
| 88 |
|
| 89 |
# Summary
|
|
@@ -92,7 +92,7 @@ def main():
|
|
| 92 |
print("=" * 50)
|
| 93 |
|
| 94 |
for name, result in results:
|
| 95 |
-
status = "
|
| 96 |
print(f"{name}: {status}")
|
| 97 |
|
| 98 |
passed = sum(1 for _, result in results if result)
|
|
@@ -110,3 +110,4 @@ def main():
|
|
| 110 |
if __name__ == "__main__":
|
| 111 |
success = main()
|
| 112 |
exit(0 if success else 1)
|
|
|
|
|
|
| 13 |
import tempfile
|
| 14 |
import gradio as gr
|
| 15 |
from fastapi import FastAPI, HTTPException
|
| 16 |
+
print("SUCCESS: Basic imports successful")
|
| 17 |
except ImportError as e:
|
| 18 |
+
print(f"ERROR: Basic import failed: {e}")
|
| 19 |
return False
|
| 20 |
|
| 21 |
try:
|
| 22 |
import logging
|
| 23 |
import asyncio
|
| 24 |
from typing import Optional
|
| 25 |
+
print("SUCCESS: Standard library imports successful")
|
| 26 |
except ImportError as e:
|
| 27 |
+
print(f"ERROR: Standard library import failed: {e}")
|
| 28 |
return False
|
| 29 |
|
| 30 |
try:
|
| 31 |
from robust_tts_client import RobustTTSClient
|
| 32 |
+
print("SUCCESS: Robust TTS client import successful")
|
| 33 |
except ImportError as e:
|
| 34 |
+
print(f"ERROR: Robust TTS client import failed: {e}")
|
| 35 |
return False
|
| 36 |
|
| 37 |
try:
|
| 38 |
from advanced_tts_client import AdvancedTTSClient
|
| 39 |
+
print("SUCCESS: Advanced TTS client import successful")
|
| 40 |
except ImportError as e:
|
| 41 |
+
print(f"WARNING: Advanced TTS client import failed (this is OK): {e}")
|
| 42 |
|
| 43 |
return True
|
| 44 |
|
|
|
|
| 49 |
try:
|
| 50 |
# Import the main app components
|
| 51 |
from app import app, omni_api, TTSManager
|
| 52 |
+
print("SUCCESS: App components imported successfully")
|
| 53 |
|
| 54 |
# Test TTS manager creation
|
| 55 |
tts_manager = TTSManager()
|
| 56 |
+
print("SUCCESS: TTS manager created successfully")
|
| 57 |
|
| 58 |
# Test app instance
|
| 59 |
if app:
|
| 60 |
+
print("SUCCESS: FastAPI app created successfully")
|
| 61 |
|
| 62 |
return True
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
+
print(f"ERROR: App creation failed: {e}")
|
| 66 |
import traceback
|
| 67 |
traceback.print_exc()
|
| 68 |
return False
|
| 69 |
|
| 70 |
def main():
|
| 71 |
"""Run all tests"""
|
| 72 |
+
print("[LAUNCH] BUILD TEST SUITE")
|
| 73 |
print("=" * 50)
|
| 74 |
|
| 75 |
tests = [
|
|
|
|
| 83 |
result = test_func()
|
| 84 |
results.append((name, result))
|
| 85 |
except Exception as e:
|
| 86 |
+
print(f"ERROR: {name} crashed: {e}")
|
| 87 |
results.append((name, False))
|
| 88 |
|
| 89 |
# Summary
|
|
|
|
| 92 |
print("=" * 50)
|
| 93 |
|
| 94 |
for name, result in results:
|
| 95 |
+
status = "SUCCESS: PASS" if result else "ERROR: FAIL"
|
| 96 |
print(f"{name}: {status}")
|
| 97 |
|
| 98 |
passed = sum(1 for _, result in results if result)
|
|
|
|
| 110 |
if __name__ == "__main__":
|
| 111 |
success = main()
|
| 112 |
exit(0 if success else 1)
|
| 113 |
+
|
|
@@ -53,10 +53,10 @@ class OmniAvatarModelDownloader:
|
|
| 53 |
try:
|
| 54 |
subprocess.run([sys.executable, "-m", "pip", "install", "huggingface_hub[cli]"],
|
| 55 |
check=True, capture_output=True)
|
| 56 |
-
logger.info("
|
| 57 |
return True
|
| 58 |
except subprocess.CalledProcessError as e:
|
| 59 |
-
logger.error(f"
|
| 60 |
return False
|
| 61 |
|
| 62 |
def check_huggingface_cli(self):
|
|
@@ -65,12 +65,12 @@ class OmniAvatarModelDownloader:
|
|
| 65 |
result = subprocess.run(["huggingface-cli", "--version"],
|
| 66 |
capture_output=True, text=True)
|
| 67 |
if result.returncode == 0:
|
| 68 |
-
logger.info("
|
| 69 |
return True
|
| 70 |
except FileNotFoundError:
|
| 71 |
pass
|
| 72 |
|
| 73 |
-
logger.info("
|
| 74 |
return self.install_huggingface_cli()
|
| 75 |
|
| 76 |
def create_model_directories(self):
|
|
@@ -80,7 +80,7 @@ class OmniAvatarModelDownloader:
|
|
| 80 |
for model_name in self.required_models.keys():
|
| 81 |
model_dir = self.models_dir / model_name
|
| 82 |
model_dir.mkdir(parents=True, exist_ok=True)
|
| 83 |
-
logger.info(f"
|
| 84 |
|
| 85 |
def download_model_with_cli(self, model_name: str, model_info: dict) -> bool:
|
| 86 |
"""Download model using HuggingFace CLI"""
|
|
@@ -88,11 +88,11 @@ class OmniAvatarModelDownloader:
|
|
| 88 |
|
| 89 |
# Skip if already downloaded
|
| 90 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 91 |
-
logger.info(f"
|
| 92 |
return True
|
| 93 |
|
| 94 |
logger.info(f"π₯ Downloading {model_name} ({model_info['size']})...")
|
| 95 |
-
logger.info(f"
|
| 96 |
|
| 97 |
cmd = [
|
| 98 |
"huggingface-cli", "download",
|
|
@@ -102,13 +102,13 @@ class OmniAvatarModelDownloader:
|
|
| 102 |
]
|
| 103 |
|
| 104 |
try:
|
| 105 |
-
logger.info(f"
|
| 106 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 107 |
-
logger.info(f"
|
| 108 |
return True
|
| 109 |
|
| 110 |
except subprocess.CalledProcessError as e:
|
| 111 |
-
logger.error(f"
|
| 112 |
return False
|
| 113 |
|
| 114 |
def download_model_with_git(self, model_name: str, model_info: dict) -> bool:
|
|
@@ -116,7 +116,7 @@ class OmniAvatarModelDownloader:
|
|
| 116 |
local_dir = self.models_dir / model_name
|
| 117 |
|
| 118 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 119 |
-
logger.info(f"
|
| 120 |
return True
|
| 121 |
|
| 122 |
logger.info(f"π₯ Downloading {model_name} with git clone...")
|
|
@@ -129,10 +129,10 @@ class OmniAvatarModelDownloader:
|
|
| 129 |
|
| 130 |
try:
|
| 131 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 132 |
-
logger.info(f"
|
| 133 |
return True
|
| 134 |
except subprocess.CalledProcessError as e:
|
| 135 |
-
logger.error(f"
|
| 136 |
return False
|
| 137 |
|
| 138 |
def verify_downloads(self) -> bool:
|
|
@@ -145,24 +145,24 @@ class OmniAvatarModelDownloader:
|
|
| 145 |
|
| 146 |
if model_dir.exists() and any(model_dir.iterdir()):
|
| 147 |
file_count = len(list(model_dir.rglob("*")))
|
| 148 |
-
logger.info(f"
|
| 149 |
else:
|
| 150 |
-
logger.error(f"
|
| 151 |
all_present = False
|
| 152 |
|
| 153 |
return all_present
|
| 154 |
|
| 155 |
def download_all_models(self) -> bool:
|
| 156 |
"""Download all required models for video generation"""
|
| 157 |
-
logger.info("
|
| 158 |
logger.info("=" * 60)
|
| 159 |
-
logger.info("
|
| 160 |
-
logger.info("
|
| 161 |
logger.info("")
|
| 162 |
|
| 163 |
# Check prerequisites
|
| 164 |
if not self.check_huggingface_cli():
|
| 165 |
-
logger.error("
|
| 166 |
return False
|
| 167 |
|
| 168 |
# Create directories
|
|
@@ -178,26 +178,26 @@ class OmniAvatarModelDownloader:
|
|
| 178 |
|
| 179 |
# Fallback to git if CLI fails
|
| 180 |
if not success:
|
| 181 |
-
logger.info("
|
| 182 |
success = self.download_model_with_git(model_name, model_info)
|
| 183 |
|
| 184 |
if success:
|
| 185 |
success_count += 1
|
| 186 |
-
logger.info(f"
|
| 187 |
else:
|
| 188 |
-
logger.error(f"
|
| 189 |
if model_info["essential"]:
|
| 190 |
logger.error("π¨ This model is ESSENTIAL for video generation!")
|
| 191 |
|
| 192 |
# Verify all downloads
|
| 193 |
if self.verify_downloads():
|
| 194 |
logger.info("\nπ ALL OMNIAVATAR MODELS DOWNLOADED SUCCESSFULLY!")
|
| 195 |
-
logger.info("
|
| 196 |
-
logger.info("
|
| 197 |
return True
|
| 198 |
else:
|
| 199 |
-
logger.error("\
|
| 200 |
-
logger.error("
|
| 201 |
return False
|
| 202 |
|
| 203 |
def main():
|
|
@@ -208,14 +208,14 @@ def main():
|
|
| 208 |
success = downloader.download_all_models()
|
| 209 |
|
| 210 |
if success:
|
| 211 |
-
print("\n
|
| 212 |
-
print("
|
| 213 |
-
print("
|
| 214 |
return 0
|
| 215 |
else:
|
| 216 |
-
print("\
|
| 217 |
-
print("
|
| 218 |
-
print("
|
| 219 |
return 1
|
| 220 |
|
| 221 |
except KeyboardInterrupt:
|
|
@@ -227,3 +227,4 @@ def main():
|
|
| 227 |
|
| 228 |
if __name__ == "__main__":
|
| 229 |
sys.exit(main())
|
|
|
|
|
|
| 53 |
try:
|
| 54 |
subprocess.run([sys.executable, "-m", "pip", "install", "huggingface_hub[cli]"],
|
| 55 |
check=True, capture_output=True)
|
| 56 |
+
logger.info("SUCCESS: HuggingFace CLI installed")
|
| 57 |
return True
|
| 58 |
except subprocess.CalledProcessError as e:
|
| 59 |
+
logger.error(f"ERROR: Failed to install HuggingFace CLI: {e}")
|
| 60 |
return False
|
| 61 |
|
| 62 |
def check_huggingface_cli(self):
|
|
|
|
| 65 |
result = subprocess.run(["huggingface-cli", "--version"],
|
| 66 |
capture_output=True, text=True)
|
| 67 |
if result.returncode == 0:
|
| 68 |
+
logger.info("SUCCESS: HuggingFace CLI available")
|
| 69 |
return True
|
| 70 |
except FileNotFoundError:
|
| 71 |
pass
|
| 72 |
|
| 73 |
+
logger.info("ERROR: HuggingFace CLI not found, installing...")
|
| 74 |
return self.install_huggingface_cli()
|
| 75 |
|
| 76 |
def create_model_directories(self):
|
|
|
|
| 80 |
for model_name in self.required_models.keys():
|
| 81 |
model_dir = self.models_dir / model_name
|
| 82 |
model_dir.mkdir(parents=True, exist_ok=True)
|
| 83 |
+
logger.info(f"SUCCESS: Created: {model_dir}")
|
| 84 |
|
| 85 |
def download_model_with_cli(self, model_name: str, model_info: dict) -> bool:
|
| 86 |
"""Download model using HuggingFace CLI"""
|
|
|
|
| 88 |
|
| 89 |
# Skip if already downloaded
|
| 90 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 91 |
+
logger.info(f"SUCCESS: {model_name} already exists, skipping...")
|
| 92 |
return True
|
| 93 |
|
| 94 |
logger.info(f"π₯ Downloading {model_name} ({model_info['size']})...")
|
| 95 |
+
logger.info(f"[INFO] {model_info['description']}")
|
| 96 |
|
| 97 |
cmd = [
|
| 98 |
"huggingface-cli", "download",
|
|
|
|
| 102 |
]
|
| 103 |
|
| 104 |
try:
|
| 105 |
+
logger.info(f"[LAUNCH] Running: {' '.join(cmd)}")
|
| 106 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 107 |
+
logger.info(f"SUCCESS: {model_name} downloaded successfully!")
|
| 108 |
return True
|
| 109 |
|
| 110 |
except subprocess.CalledProcessError as e:
|
| 111 |
+
logger.error(f"ERROR: Failed to download {model_name}: {e.stderr}")
|
| 112 |
return False
|
| 113 |
|
| 114 |
def download_model_with_git(self, model_name: str, model_info: dict) -> bool:
|
|
|
|
| 116 |
local_dir = self.models_dir / model_name
|
| 117 |
|
| 118 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 119 |
+
logger.info(f"SUCCESS: {model_name} already exists, skipping...")
|
| 120 |
return True
|
| 121 |
|
| 122 |
logger.info(f"π₯ Downloading {model_name} with git clone...")
|
|
|
|
| 129 |
|
| 130 |
try:
|
| 131 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 132 |
+
logger.info(f"SUCCESS: {model_name} downloaded with git!")
|
| 133 |
return True
|
| 134 |
except subprocess.CalledProcessError as e:
|
| 135 |
+
logger.error(f"ERROR: Git clone failed for {model_name}: {e.stderr}")
|
| 136 |
return False
|
| 137 |
|
| 138 |
def verify_downloads(self) -> bool:
|
|
|
|
| 145 |
|
| 146 |
if model_dir.exists() and any(model_dir.iterdir()):
|
| 147 |
file_count = len(list(model_dir.rglob("*")))
|
| 148 |
+
logger.info(f"SUCCESS: {model_name}: {file_count} files found")
|
| 149 |
else:
|
| 150 |
+
logger.error(f"ERROR: {model_name}: Missing or empty")
|
| 151 |
all_present = False
|
| 152 |
|
| 153 |
return all_present
|
| 154 |
|
| 155 |
def download_all_models(self) -> bool:
|
| 156 |
"""Download all required models for video generation"""
|
| 157 |
+
logger.info("[VIDEO] DOWNLOADING OMNIAVATAR MODELS FOR VIDEO GENERATION")
|
| 158 |
logger.info("=" * 60)
|
| 159 |
+
logger.info("WARNING: This will download approximately 30GB of models")
|
| 160 |
+
logger.info("[TARGET] These models are REQUIRED for avatar video generation")
|
| 161 |
logger.info("")
|
| 162 |
|
| 163 |
# Check prerequisites
|
| 164 |
if not self.check_huggingface_cli():
|
| 165 |
+
logger.error("ERROR: Cannot proceed without HuggingFace CLI")
|
| 166 |
return False
|
| 167 |
|
| 168 |
# Create directories
|
|
|
|
| 178 |
|
| 179 |
# Fallback to git if CLI fails
|
| 180 |
if not success:
|
| 181 |
+
logger.info("[PROCESS] Trying git clone fallback...")
|
| 182 |
success = self.download_model_with_git(model_name, model_info)
|
| 183 |
|
| 184 |
if success:
|
| 185 |
success_count += 1
|
| 186 |
+
logger.info(f"SUCCESS: {model_name} download completed")
|
| 187 |
else:
|
| 188 |
+
logger.error(f"ERROR: {model_name} download failed")
|
| 189 |
if model_info["essential"]:
|
| 190 |
logger.error("π¨ This model is ESSENTIAL for video generation!")
|
| 191 |
|
| 192 |
# Verify all downloads
|
| 193 |
if self.verify_downloads():
|
| 194 |
logger.info("\nπ ALL OMNIAVATAR MODELS DOWNLOADED SUCCESSFULLY!")
|
| 195 |
+
logger.info("[VIDEO] Avatar video generation is now FULLY ENABLED!")
|
| 196 |
+
logger.info("TIP: Restart your application to activate video generation")
|
| 197 |
return True
|
| 198 |
else:
|
| 199 |
+
logger.error("\nERROR: Model download incomplete")
|
| 200 |
+
logger.error("[TARGET] Video generation will not work without all required models")
|
| 201 |
return False
|
| 202 |
|
| 203 |
def main():
|
|
|
|
| 208 |
success = downloader.download_all_models()
|
| 209 |
|
| 210 |
if success:
|
| 211 |
+
print("\n[VIDEO] OMNIAVATAR VIDEO GENERATION READY!")
|
| 212 |
+
print("SUCCESS: All models downloaded successfully")
|
| 213 |
+
print("[LAUNCH] Your app can now generate avatar videos!")
|
| 214 |
return 0
|
| 215 |
else:
|
| 216 |
+
print("\nERROR: MODEL DOWNLOAD FAILED")
|
| 217 |
+
print("[TARGET] Video generation will not work")
|
| 218 |
+
print("TIP: Please check the error messages above")
|
| 219 |
return 1
|
| 220 |
|
| 221 |
except KeyboardInterrupt:
|
|
|
|
| 227 |
|
| 228 |
if __name__ == "__main__":
|
| 229 |
sys.exit(main())
|
| 230 |
+
|
|
@@ -94,7 +94,7 @@ class ElevenLabsOmniAvatarClient:
|
|
| 94 |
elevenlabs_audio_url = self.text_to_speech_url(speech_text, voice_id)
|
| 95 |
|
| 96 |
# Step 2: Generate avatar with OmniAvatar
|
| 97 |
-
print(f"
|
| 98 |
print(f"Prompt: {prompt}")
|
| 99 |
|
| 100 |
avatar_data = {
|
|
@@ -116,14 +116,14 @@ class ElevenLabsOmniAvatarClient:
|
|
| 116 |
|
| 117 |
result = response.json()
|
| 118 |
|
| 119 |
-
print(f"
|
| 120 |
print(f"Output: {result['output_path']}")
|
| 121 |
print(f"Processing time: {result['processing_time']:.2f}s")
|
| 122 |
|
| 123 |
return result
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
-
print(f"
|
| 127 |
raise
|
| 128 |
|
| 129 |
def main():
|
|
@@ -134,7 +134,7 @@ def main():
|
|
| 134 |
OMNI_AVATAR_URL = os.getenv("OMNI_AVATAR_URL", "http://localhost:7860")
|
| 135 |
|
| 136 |
if ELEVENLABS_API_KEY == "your-elevenlabs-api-key":
|
| 137 |
-
print("
|
| 138 |
print("Example: export ELEVENLABS_API_KEY='your-actual-api-key'")
|
| 139 |
return
|
| 140 |
|
|
@@ -180,3 +180,4 @@ def main():
|
|
| 180 |
|
| 181 |
if __name__ == "__main__":
|
| 182 |
main()
|
|
|
|
|
|
| 94 |
elevenlabs_audio_url = self.text_to_speech_url(speech_text, voice_id)
|
| 95 |
|
| 96 |
# Step 2: Generate avatar with OmniAvatar
|
| 97 |
+
print(f"[AVATAR] Generating avatar with OmniAvatar...")
|
| 98 |
print(f"Prompt: {prompt}")
|
| 99 |
|
| 100 |
avatar_data = {
|
|
|
|
| 116 |
|
| 117 |
result = response.json()
|
| 118 |
|
| 119 |
+
print(f"SUCCESS: Avatar generated successfully!")
|
| 120 |
print(f"Output: {result['output_path']}")
|
| 121 |
print(f"Processing time: {result['processing_time']:.2f}s")
|
| 122 |
|
| 123 |
return result
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
+
print(f"ERROR: Error generating avatar: {e}")
|
| 127 |
raise
|
| 128 |
|
| 129 |
def main():
|
|
|
|
| 134 |
OMNI_AVATAR_URL = os.getenv("OMNI_AVATAR_URL", "http://localhost:7860")
|
| 135 |
|
| 136 |
if ELEVENLABS_API_KEY == "your-elevenlabs-api-key":
|
| 137 |
+
print("WARNING: Please set your ELEVENLABS_API_KEY environment variable")
|
| 138 |
print("Example: export ELEVENLABS_API_KEY='your-actual-api-key'")
|
| 139 |
return
|
| 140 |
|
|
|
|
| 180 |
|
| 181 |
if __name__ == "__main__":
|
| 182 |
main()
|
| 183 |
+
|
|
@@ -14,14 +14,14 @@ async def lifespan(app: FastAPI):
|
|
| 14 |
# Startup
|
| 15 |
success = omni_api.load_model()
|
| 16 |
if not success:
|
| 17 |
-
logger.warning("
|
| 18 |
|
| 19 |
# Load TTS models
|
| 20 |
try:
|
| 21 |
await omni_api.tts_manager.load_models()
|
| 22 |
-
logger.info("
|
| 23 |
except Exception as e:
|
| 24 |
-
logger.error(f"
|
| 25 |
|
| 26 |
yield
|
| 27 |
|
|
@@ -36,3 +36,4 @@ app = FastAPI(
|
|
| 36 |
)
|
| 37 |
|
| 38 |
# Remove the problematic line: app.router.lifespan_context = lifespan
|
|
|
|
|
|
| 14 |
# Startup
|
| 15 |
success = omni_api.load_model()
|
| 16 |
if not success:
|
| 17 |
+
logger.warning("WARNING: OmniAvatar model loading failed - running in limited mode")
|
| 18 |
|
| 19 |
# Load TTS models
|
| 20 |
try:
|
| 21 |
await omni_api.tts_manager.load_models()
|
| 22 |
+
logger.info("SUCCESS: TTS models initialization completed")
|
| 23 |
except Exception as e:
|
| 24 |
+
logger.error(f"ERROR: TTS initialization failed: {e}")
|
| 25 |
|
| 26 |
yield
|
| 27 |
|
|
|
|
| 36 |
)
|
| 37 |
|
| 38 |
# Remove the problematic line: app.router.lifespan_context = lifespan
|
| 39 |
+
|
|
@@ -40,11 +40,11 @@ class HuggingFaceTTSClient:
|
|
| 40 |
self.speaker_embeddings = self._get_default_speaker_embedding()
|
| 41 |
|
| 42 |
self.model_loaded = True
|
| 43 |
-
logger.info("
|
| 44 |
return True
|
| 45 |
|
| 46 |
except Exception as e:
|
| 47 |
-
logger.error(f"
|
| 48 |
return False
|
| 49 |
|
| 50 |
def _get_default_speaker_embedding(self):
|
|
@@ -118,9 +118,10 @@ class HuggingFaceTTSClient:
|
|
| 118 |
sf.write(temp_file.name, audio_data, samplerate=16000)
|
| 119 |
temp_file.close()
|
| 120 |
|
| 121 |
-
logger.info(f"
|
| 122 |
return temp_file.name
|
| 123 |
|
| 124 |
except Exception as e:
|
| 125 |
-
logger.error(f"
|
| 126 |
raise Exception(f"TTS generation failed: {e}")
|
|
|
|
|
|
| 40 |
self.speaker_embeddings = self._get_default_speaker_embedding()
|
| 41 |
|
| 42 |
self.model_loaded = True
|
| 43 |
+
logger.info("SUCCESS: SpeechT5 TTS model loaded successfully")
|
| 44 |
return True
|
| 45 |
|
| 46 |
except Exception as e:
|
| 47 |
+
logger.error(f"ERROR: Failed to load TTS model: {e}")
|
| 48 |
return False
|
| 49 |
|
| 50 |
def _get_default_speaker_embedding(self):
|
|
|
|
| 118 |
sf.write(temp_file.name, audio_data, samplerate=16000)
|
| 119 |
temp_file.close()
|
| 120 |
|
| 121 |
+
logger.info(f"SUCCESS: Generated speech audio: {temp_file.name}")
|
| 122 |
return temp_file.name
|
| 123 |
|
| 124 |
except Exception as e:
|
| 125 |
+
logger.error(f"ERROR: Error generating speech: {e}")
|
| 126 |
raise Exception(f"TTS generation failed: {e}")
|
| 127 |
+
|
|
@@ -14,21 +14,21 @@ logger = logging.getLogger(__name__)
|
|
| 14 |
|
| 15 |
def run_pip_command(cmd, description="", optional=False):
|
| 16 |
"""Run a pip command with proper error handling"""
|
| 17 |
-
logger.info(f"
|
| 18 |
try:
|
| 19 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 20 |
-
logger.info(f"
|
| 21 |
return True
|
| 22 |
except subprocess.CalledProcessError as e:
|
| 23 |
if optional:
|
| 24 |
-
logger.warning(f"
|
| 25 |
return False
|
| 26 |
else:
|
| 27 |
-
logger.error(f"
|
| 28 |
raise
|
| 29 |
|
| 30 |
def main():
|
| 31 |
-
logger.info("
|
| 32 |
|
| 33 |
# Step 1: Upgrade pip and essential tools
|
| 34 |
run_pip_command([
|
|
@@ -46,7 +46,7 @@ def main():
|
|
| 46 |
"--index-url", "https://download.pytorch.org/whl/cu124"
|
| 47 |
], "Installing PyTorch with CUDA support")
|
| 48 |
except:
|
| 49 |
-
logger.warning("
|
| 50 |
run_pip_command([
|
| 51 |
sys.executable, "-m", "pip", "install",
|
| 52 |
"torch", "torchvision", "torchaudio"
|
|
@@ -58,7 +58,7 @@ def main():
|
|
| 58 |
], "Installing main requirements")
|
| 59 |
|
| 60 |
# Step 4: Try to install optional performance packages
|
| 61 |
-
logger.info("
|
| 62 |
|
| 63 |
# Try xformers (memory efficient attention)
|
| 64 |
run_pip_command([
|
|
@@ -73,8 +73,8 @@ def main():
|
|
| 73 |
sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"
|
| 74 |
], "Installing flash-attn from wheel", optional=True)
|
| 75 |
except:
|
| 76 |
-
logger.warning("
|
| 77 |
-
logger.info("
|
| 78 |
|
| 79 |
# Step 5: Verify installation
|
| 80 |
logger.info("π Verifying installation...")
|
|
@@ -84,34 +84,34 @@ def main():
|
|
| 84 |
import gradio
|
| 85 |
import fastapi
|
| 86 |
|
| 87 |
-
logger.info(f"
|
| 88 |
-
logger.info(f"
|
| 89 |
-
logger.info(f"
|
| 90 |
|
| 91 |
if torch.cuda.is_available():
|
| 92 |
-
logger.info(f"
|
| 93 |
-
logger.info(f"
|
| 94 |
else:
|
| 95 |
logger.info("βΉοΈ CUDA not available - will use CPU")
|
| 96 |
|
| 97 |
# Check optional packages
|
| 98 |
try:
|
| 99 |
import xformers
|
| 100 |
-
logger.info(f"
|
| 101 |
except ImportError:
|
| 102 |
logger.info("βΉοΈ xformers not available (optional)")
|
| 103 |
|
| 104 |
try:
|
| 105 |
import flash_attn
|
| 106 |
-
logger.info("
|
| 107 |
except ImportError:
|
| 108 |
logger.info("βΉοΈ flash_attn not available (optional)")
|
| 109 |
|
| 110 |
logger.info("π Installation completed successfully!")
|
| 111 |
-
logger.info("
|
| 112 |
|
| 113 |
except ImportError as e:
|
| 114 |
-
logger.error(f"
|
| 115 |
return False
|
| 116 |
|
| 117 |
return True
|
|
@@ -119,3 +119,4 @@ def main():
|
|
| 119 |
if __name__ == "__main__":
|
| 120 |
success = main()
|
| 121 |
sys.exit(0 if success else 1)
|
|
|
|
|
|
| 14 |
|
| 15 |
def run_pip_command(cmd, description="", optional=False):
|
| 16 |
"""Run a pip command with proper error handling"""
|
| 17 |
+
logger.info(f"[PROCESS] {description}")
|
| 18 |
try:
|
| 19 |
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
| 20 |
+
logger.info(f"SUCCESS: {description} - Success")
|
| 21 |
return True
|
| 22 |
except subprocess.CalledProcessError as e:
|
| 23 |
if optional:
|
| 24 |
+
logger.warning(f"WARNING: {description} - Failed (optional): {e.stderr}")
|
| 25 |
return False
|
| 26 |
else:
|
| 27 |
+
logger.error(f"ERROR: {description} - Failed: {e.stderr}")
|
| 28 |
raise
|
| 29 |
|
| 30 |
def main():
|
| 31 |
+
logger.info("[LAUNCH] Starting safe dependency installation for OmniAvatar")
|
| 32 |
|
| 33 |
# Step 1: Upgrade pip and essential tools
|
| 34 |
run_pip_command([
|
|
|
|
| 46 |
"--index-url", "https://download.pytorch.org/whl/cu124"
|
| 47 |
], "Installing PyTorch with CUDA support")
|
| 48 |
except:
|
| 49 |
+
logger.warning("WARNING: CUDA PyTorch failed, installing CPU version")
|
| 50 |
run_pip_command([
|
| 51 |
sys.executable, "-m", "pip", "install",
|
| 52 |
"torch", "torchvision", "torchaudio"
|
|
|
|
| 58 |
], "Installing main requirements")
|
| 59 |
|
| 60 |
# Step 4: Try to install optional performance packages
|
| 61 |
+
logger.info("[TARGET] Installing optional performance packages...")
|
| 62 |
|
| 63 |
# Try xformers (memory efficient attention)
|
| 64 |
run_pip_command([
|
|
|
|
| 73 |
sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"
|
| 74 |
], "Installing flash-attn from wheel", optional=True)
|
| 75 |
except:
|
| 76 |
+
logger.warning("WARNING: flash-attn installation failed - this is common and not critical")
|
| 77 |
+
logger.info("TIP: flash-attn can be installed later manually if needed")
|
| 78 |
|
| 79 |
# Step 5: Verify installation
|
| 80 |
logger.info("π Verifying installation...")
|
|
|
|
| 84 |
import gradio
|
| 85 |
import fastapi
|
| 86 |
|
| 87 |
+
logger.info(f"SUCCESS: PyTorch: {torch.__version__}")
|
| 88 |
+
logger.info(f"SUCCESS: Transformers: {transformers.__version__}")
|
| 89 |
+
logger.info(f"SUCCESS: Gradio: {gradio.__version__}")
|
| 90 |
|
| 91 |
if torch.cuda.is_available():
|
| 92 |
+
logger.info(f"SUCCESS: CUDA: {torch.version.cuda}")
|
| 93 |
+
logger.info(f"SUCCESS: GPU Count: {torch.cuda.device_count()}")
|
| 94 |
else:
|
| 95 |
logger.info("βΉοΈ CUDA not available - will use CPU")
|
| 96 |
|
| 97 |
# Check optional packages
|
| 98 |
try:
|
| 99 |
import xformers
|
| 100 |
+
logger.info(f"SUCCESS: xformers: {xformers.__version__}")
|
| 101 |
except ImportError:
|
| 102 |
logger.info("βΉοΈ xformers not available (optional)")
|
| 103 |
|
| 104 |
try:
|
| 105 |
import flash_attn
|
| 106 |
+
logger.info("SUCCESS: flash_attn: Available")
|
| 107 |
except ImportError:
|
| 108 |
logger.info("βΉοΈ flash_attn not available (optional)")
|
| 109 |
|
| 110 |
logger.info("π Installation completed successfully!")
|
| 111 |
+
logger.info("TIP: You can now run: python app.py")
|
| 112 |
|
| 113 |
except ImportError as e:
|
| 114 |
+
logger.error(f"ERROR: Installation verification failed: {e}")
|
| 115 |
return False
|
| 116 |
|
| 117 |
return True
|
|
|
|
| 119 |
if __name__ == "__main__":
|
| 120 |
success = main()
|
| 121 |
sys.exit(0 if success else 1)
|
| 122 |
+
|
|
@@ -29,11 +29,11 @@ class MinimalTTSClient:
|
|
| 29 |
# For now, we'll create a mock TTS that generates simple audio
|
| 30 |
# This avoids all the complex model loading issues
|
| 31 |
self.model_loaded = True
|
| 32 |
-
logger.info("
|
| 33 |
return True
|
| 34 |
|
| 35 |
except Exception as e:
|
| 36 |
-
logger.error(f"
|
| 37 |
return False
|
| 38 |
|
| 39 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
@@ -67,10 +67,11 @@ class MinimalTTSClient:
|
|
| 67 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 68 |
temp_file.close()
|
| 69 |
|
| 70 |
-
logger.info(f"
|
| 71 |
logger.warning("π’ Using placeholder audio - TTS will be improved in next update")
|
| 72 |
return temp_file.name
|
| 73 |
|
| 74 |
except Exception as e:
|
| 75 |
-
logger.error(f"
|
| 76 |
raise Exception(f"Audio generation failed: {e}")
|
|
|
|
|
|
| 29 |
# For now, we'll create a mock TTS that generates simple audio
|
| 30 |
# This avoids all the complex model loading issues
|
| 31 |
self.model_loaded = True
|
| 32 |
+
logger.info("SUCCESS: Minimal TTS ready")
|
| 33 |
return True
|
| 34 |
|
| 35 |
except Exception as e:
|
| 36 |
+
logger.error(f"ERROR: Failed to load TTS: {e}")
|
| 37 |
return False
|
| 38 |
|
| 39 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
|
|
| 67 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 68 |
temp_file.close()
|
| 69 |
|
| 70 |
+
logger.info(f"SUCCESS: Generated placeholder audio: {temp_file.name}")
|
| 71 |
logger.warning("π’ Using placeholder audio - TTS will be improved in next update")
|
| 72 |
return temp_file.name
|
| 73 |
|
| 74 |
except Exception as e:
|
| 75 |
+
logger.error(f"ERROR: Error generating audio: {e}")
|
| 76 |
raise Exception(f"Audio generation failed: {e}")
|
| 77 |
+
|
|
@@ -56,10 +56,10 @@ class OmniAvatarEngine:
|
|
| 56 |
model_path = Path(path)
|
| 57 |
if model_path.exists() and any(model_path.iterdir()):
|
| 58 |
status[name] = True
|
| 59 |
-
logger.info(f"
|
| 60 |
else:
|
| 61 |
status[name] = False
|
| 62 |
-
logger.warning(f"
|
| 63 |
|
| 64 |
self.models_loaded = all(status.values())
|
| 65 |
|
|
@@ -67,7 +67,7 @@ class OmniAvatarEngine:
|
|
| 67 |
logger.info("π All OmniAvatar-14B models available!")
|
| 68 |
else:
|
| 69 |
missing = [name for name, available in status.items() if not available]
|
| 70 |
-
logger.warning(f"
|
| 71 |
|
| 72 |
return status
|
| 73 |
|
|
@@ -84,8 +84,8 @@ class OmniAvatarEngine:
|
|
| 84 |
|
| 85 |
# TODO: Implement actual model loading
|
| 86 |
# This would require the full OmniAvatar implementation
|
| 87 |
-
logger.info("
|
| 88 |
-
logger.info("
|
| 89 |
|
| 90 |
self.models_loaded = True
|
| 91 |
return True
|
|
@@ -171,7 +171,7 @@ class OmniAvatarEngine:
|
|
| 171 |
|
| 172 |
cmd.extend(["--hp", ",".join(hp_params)])
|
| 173 |
|
| 174 |
-
logger.info(f"
|
| 175 |
logger.info(f"Command: {' '.join(cmd)}")
|
| 176 |
|
| 177 |
# Run inference
|
|
@@ -194,7 +194,7 @@ class OmniAvatarEngine:
|
|
| 194 |
latest_video = max(video_files, key=lambda x: x.stat().st_mtime)
|
| 195 |
processing_time = time.time() - start_time
|
| 196 |
|
| 197 |
-
logger.info(f"
|
| 198 |
logger.info(f"β±οΈ Processing time: {processing_time:.1f}s")
|
| 199 |
|
| 200 |
return str(latest_video), processing_time
|
|
@@ -334,3 +334,4 @@ class OmniAvatarEngine:
|
|
| 334 |
|
| 335 |
# Global instance
|
| 336 |
omni_engine = OmniAvatarEngine()
|
|
|
|
|
|
| 56 |
model_path = Path(path)
|
| 57 |
if model_path.exists() and any(model_path.iterdir()):
|
| 58 |
status[name] = True
|
| 59 |
+
logger.info(f"SUCCESS: {name} model found at {path}")
|
| 60 |
else:
|
| 61 |
status[name] = False
|
| 62 |
+
logger.warning(f"ERROR: {name} model not found at {path}")
|
| 63 |
|
| 64 |
self.models_loaded = all(status.values())
|
| 65 |
|
|
|
|
| 67 |
logger.info("π All OmniAvatar-14B models available!")
|
| 68 |
else:
|
| 69 |
missing = [name for name, available in status.items() if not available]
|
| 70 |
+
logger.warning(f"WARNING: Missing models: {', '.join(missing)}")
|
| 71 |
|
| 72 |
return status
|
| 73 |
|
|
|
|
| 84 |
|
| 85 |
# TODO: Implement actual model loading
|
| 86 |
# This would require the full OmniAvatar implementation
|
| 87 |
+
logger.info("[PROCESS] Model loading logic would be implemented here")
|
| 88 |
+
logger.info("TIP: For full implementation, integrate with official OmniAvatar codebase")
|
| 89 |
|
| 90 |
self.models_loaded = True
|
| 91 |
return True
|
|
|
|
| 171 |
|
| 172 |
cmd.extend(["--hp", ",".join(hp_params)])
|
| 173 |
|
| 174 |
+
logger.info(f"[LAUNCH] Running OmniAvatar inference:")
|
| 175 |
logger.info(f"Command: {' '.join(cmd)}")
|
| 176 |
|
| 177 |
# Run inference
|
|
|
|
| 194 |
latest_video = max(video_files, key=lambda x: x.stat().st_mtime)
|
| 195 |
processing_time = time.time() - start_time
|
| 196 |
|
| 197 |
+
logger.info(f"SUCCESS: Video generated successfully: {latest_video}")
|
| 198 |
logger.info(f"β±οΈ Processing time: {processing_time:.1f}s")
|
| 199 |
|
| 200 |
return str(latest_video), processing_time
|
|
|
|
| 334 |
|
| 335 |
# Global instance
|
| 336 |
omni_engine = OmniAvatarEngine()
|
| 337 |
+
|
|
@@ -2,7 +2,8 @@
|
|
| 2 |
try:
|
| 3 |
from omniavatar_engine import omni_engine
|
| 4 |
OMNIAVATAR_ENGINE_AVAILABLE = True
|
| 5 |
-
logger.info("
|
| 6 |
except ImportError as e:
|
| 7 |
OMNIAVATAR_ENGINE_AVAILABLE = False
|
| 8 |
-
logger.warning(f"
|
|
|
|
|
|
| 2 |
try:
|
| 3 |
from omniavatar_engine import omni_engine
|
| 4 |
OMNIAVATAR_ENGINE_AVAILABLE = True
|
| 5 |
+
logger.info("SUCCESS: OmniAvatar Engine available")
|
| 6 |
except ImportError as e:
|
| 7 |
OMNIAVATAR_ENGINE_AVAILABLE = False
|
| 8 |
+
logger.warning(f"WARNING: OmniAvatar Engine not available: {e}")
|
| 9 |
+
|
|
@@ -45,7 +45,7 @@ class OmniAvatarVideoEngine:
|
|
| 45 |
"max_duration": 30, # seconds
|
| 46 |
}
|
| 47 |
|
| 48 |
-
logger.info(f"
|
| 49 |
self._check_and_download_models()
|
| 50 |
|
| 51 |
def _check_and_download_models(self):
|
|
@@ -56,21 +56,21 @@ class OmniAvatarVideoEngine:
|
|
| 56 |
for name, path in self.model_paths.items():
|
| 57 |
if not os.path.exists(path) or not any(Path(path).iterdir() if Path(path).exists() else []):
|
| 58 |
missing_models.append(name)
|
| 59 |
-
logger.warning(f"
|
| 60 |
else:
|
| 61 |
-
logger.info(f"
|
| 62 |
|
| 63 |
if missing_models:
|
| 64 |
logger.error(f"π¨ CRITICAL: Missing video generation models: {missing_models}")
|
| 65 |
logger.info("π₯ Attempting to download models automatically...")
|
| 66 |
self._auto_download_models()
|
| 67 |
else:
|
| 68 |
-
logger.info("
|
| 69 |
self.base_models_available = True
|
| 70 |
|
| 71 |
def _auto_download_models(self):
|
| 72 |
"""Automatically download OmniAvatar models for video generation"""
|
| 73 |
-
logger.info("
|
| 74 |
|
| 75 |
models_to_download = {
|
| 76 |
"Wan2.1-T2V-14B": {
|
|
@@ -104,10 +104,10 @@ class OmniAvatarVideoEngine:
|
|
| 104 |
success = self._download_with_requests(models_to_download)
|
| 105 |
|
| 106 |
if success:
|
| 107 |
-
logger.info("
|
| 108 |
self.base_models_available = True
|
| 109 |
else:
|
| 110 |
-
logger.error("
|
| 111 |
self.base_models_available = False
|
| 112 |
|
| 113 |
def _download_with_git_lfs(self, models):
|
|
@@ -119,18 +119,18 @@ class OmniAvatarVideoEngine:
|
|
| 119 |
result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
| 120 |
|
| 121 |
if result.returncode == 0:
|
| 122 |
-
logger.info(f"
|
| 123 |
else:
|
| 124 |
-
logger.error(f"
|
| 125 |
return False
|
| 126 |
return True
|
| 127 |
except Exception as e:
|
| 128 |
-
logger.warning(f"
|
| 129 |
return False
|
| 130 |
|
| 131 |
def _download_with_requests(self, models):
|
| 132 |
"""Fallback download method using direct HTTP requests"""
|
| 133 |
-
logger.info("
|
| 134 |
|
| 135 |
# For now, create placeholder files to enable the video generation logic
|
| 136 |
# In production, this would download actual model files
|
|
@@ -138,9 +138,9 @@ class OmniAvatarVideoEngine:
|
|
| 138 |
placeholder_file = Path(info["local_dir"]) / "model_placeholder.txt"
|
| 139 |
with open(placeholder_file, 'w') as f:
|
| 140 |
f.write(f"Placeholder for {name} model\nRepo: {info['repo']}\nDescription: {info['description']}\n")
|
| 141 |
-
logger.info(f"
|
| 142 |
|
| 143 |
-
logger.warning("
|
| 144 |
return True
|
| 145 |
|
| 146 |
def generate_avatar_video(self, prompt: str, audio_path: str,
|
|
@@ -167,13 +167,13 @@ class OmniAvatarVideoEngine:
|
|
| 167 |
|
| 168 |
if not self.base_models_available:
|
| 169 |
raise RuntimeError(
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
"π This will download the required 30GB of models for video generation."
|
| 173 |
)
|
| 174 |
|
| 175 |
-
logger.info(f"
|
| 176 |
-
logger.info(f"
|
| 177 |
logger.info(f"π΅ Audio: {audio_path}")
|
| 178 |
if image_path:
|
| 179 |
logger.info(f"πΌοΈ Reference image: {image_path}")
|
|
@@ -190,13 +190,13 @@ class OmniAvatarVideoEngine:
|
|
| 190 |
|
| 191 |
generation_time = time.time() - start_time
|
| 192 |
|
| 193 |
-
logger.info(f"
|
| 194 |
logger.info(f"β±οΈ Generation time: {generation_time:.1f}s")
|
| 195 |
|
| 196 |
return video_path, generation_time
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
-
logger.error(f"
|
| 200 |
# Don't fall back to audio - this is a VIDEO generation system!
|
| 201 |
raise RuntimeError(f"Video generation failed: {e}")
|
| 202 |
|
|
@@ -217,7 +217,7 @@ class OmniAvatarVideoEngine:
|
|
| 217 |
|
| 218 |
def _run_omniavatar_inference(self, input_file: str, config: dict) -> str:
|
| 219 |
"""Run OmniAvatar inference for video generation"""
|
| 220 |
-
logger.info("
|
| 221 |
|
| 222 |
# OmniAvatar inference command
|
| 223 |
cmd = [
|
|
@@ -231,7 +231,7 @@ class OmniAvatarVideoEngine:
|
|
| 231 |
"--num_steps", str(config["num_steps"])
|
| 232 |
]
|
| 233 |
|
| 234 |
-
logger.info(f"
|
| 235 |
|
| 236 |
try:
|
| 237 |
# For now, simulate video generation (replace with actual inference)
|
|
@@ -252,7 +252,7 @@ class OmniAvatarVideoEngine:
|
|
| 252 |
|
| 253 |
def _simulate_video_generation(self, config: dict):
|
| 254 |
"""Simulate video generation (replace with actual OmniAvatar inference)"""
|
| 255 |
-
logger.info("
|
| 256 |
|
| 257 |
# Create a mock MP4 file
|
| 258 |
output_dir = Path("./outputs")
|
|
@@ -311,3 +311,4 @@ class OmniAvatarVideoEngine:
|
|
| 311 |
|
| 312 |
# Global video engine instance
|
| 313 |
video_engine = OmniAvatarVideoEngine()
|
|
|
|
|
|
| 45 |
"max_duration": 30, # seconds
|
| 46 |
}
|
| 47 |
|
| 48 |
+
logger.info(f"[VIDEO] OmniAvatar Video Engine initialized on {self.device}")
|
| 49 |
self._check_and_download_models()
|
| 50 |
|
| 51 |
def _check_and_download_models(self):
|
|
|
|
| 56 |
for name, path in self.model_paths.items():
|
| 57 |
if not os.path.exists(path) or not any(Path(path).iterdir() if Path(path).exists() else []):
|
| 58 |
missing_models.append(name)
|
| 59 |
+
logger.warning(f"ERROR: Missing model: {name} at {path}")
|
| 60 |
else:
|
| 61 |
+
logger.info(f"SUCCESS: Found model: {name}")
|
| 62 |
|
| 63 |
if missing_models:
|
| 64 |
logger.error(f"π¨ CRITICAL: Missing video generation models: {missing_models}")
|
| 65 |
logger.info("π₯ Attempting to download models automatically...")
|
| 66 |
self._auto_download_models()
|
| 67 |
else:
|
| 68 |
+
logger.info("SUCCESS: All OmniAvatar models found - VIDEO GENERATION READY!")
|
| 69 |
self.base_models_available = True
|
| 70 |
|
| 71 |
def _auto_download_models(self):
|
| 72 |
"""Automatically download OmniAvatar models for video generation"""
|
| 73 |
+
logger.info("[LAUNCH] Auto-downloading OmniAvatar models...")
|
| 74 |
|
| 75 |
models_to_download = {
|
| 76 |
"Wan2.1-T2V-14B": {
|
|
|
|
| 104 |
success = self._download_with_requests(models_to_download)
|
| 105 |
|
| 106 |
if success:
|
| 107 |
+
logger.info("SUCCESS: Model download completed - VIDEO GENERATION ENABLED!")
|
| 108 |
self.base_models_available = True
|
| 109 |
else:
|
| 110 |
+
logger.error("ERROR: Model download failed - running in LIMITED mode")
|
| 111 |
self.base_models_available = False
|
| 112 |
|
| 113 |
def _download_with_git_lfs(self, models):
|
|
|
|
| 119 |
result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
| 120 |
|
| 121 |
if result.returncode == 0:
|
| 122 |
+
logger.info(f"SUCCESS: Downloaded {name}")
|
| 123 |
else:
|
| 124 |
+
logger.error(f"ERROR: Git clone failed for {name}: {result.stderr}")
|
| 125 |
return False
|
| 126 |
return True
|
| 127 |
except Exception as e:
|
| 128 |
+
logger.warning(f"WARNING: Git LFS download failed: {e}")
|
| 129 |
return False
|
| 130 |
|
| 131 |
def _download_with_requests(self, models):
|
| 132 |
"""Fallback download method using direct HTTP requests"""
|
| 133 |
+
logger.info("[PROCESS] Trying direct HTTP download...")
|
| 134 |
|
| 135 |
# For now, create placeholder files to enable the video generation logic
|
| 136 |
# In production, this would download actual model files
|
|
|
|
| 138 |
placeholder_file = Path(info["local_dir"]) / "model_placeholder.txt"
|
| 139 |
with open(placeholder_file, 'w') as f:
|
| 140 |
f.write(f"Placeholder for {name} model\nRepo: {info['repo']}\nDescription: {info['description']}\n")
|
| 141 |
+
logger.info(f"[INFO] Created placeholder for {name}")
|
| 142 |
|
| 143 |
+
logger.warning("WARNING: Using model placeholders - implement actual download for production!")
|
| 144 |
return True
|
| 145 |
|
| 146 |
def generate_avatar_video(self, prompt: str, audio_path: str,
|
|
|
|
| 167 |
|
| 168 |
if not self.base_models_available:
|
| 169 |
raise RuntimeError(
|
| 170 |
+
"ERROR: CRITICAL: Cannot generate videos without OmniAvatar models!\n"
|
| 171 |
+
"TIP: Please run: python setup_omniavatar.py\n"
|
| 172 |
"π This will download the required 30GB of models for video generation."
|
| 173 |
)
|
| 174 |
|
| 175 |
+
logger.info(f"[VIDEO] Generating avatar video...")
|
| 176 |
+
logger.info(f"[INFO] Prompt: {prompt}")
|
| 177 |
logger.info(f"π΅ Audio: {audio_path}")
|
| 178 |
if image_path:
|
| 179 |
logger.info(f"πΌοΈ Reference image: {image_path}")
|
|
|
|
| 190 |
|
| 191 |
generation_time = time.time() - start_time
|
| 192 |
|
| 193 |
+
logger.info(f"SUCCESS: Avatar video generated: {video_path}")
|
| 194 |
logger.info(f"β±οΈ Generation time: {generation_time:.1f}s")
|
| 195 |
|
| 196 |
return video_path, generation_time
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
+
logger.error(f"ERROR: Video generation failed: {e}")
|
| 200 |
# Don't fall back to audio - this is a VIDEO generation system!
|
| 201 |
raise RuntimeError(f"Video generation failed: {e}")
|
| 202 |
|
|
|
|
| 217 |
|
| 218 |
def _run_omniavatar_inference(self, input_file: str, config: dict) -> str:
|
| 219 |
"""Run OmniAvatar inference for video generation"""
|
| 220 |
+
logger.info("[LAUNCH] Running OmniAvatar inference...")
|
| 221 |
|
| 222 |
# OmniAvatar inference command
|
| 223 |
cmd = [
|
|
|
|
| 231 |
"--num_steps", str(config["num_steps"])
|
| 232 |
]
|
| 233 |
|
| 234 |
+
logger.info(f"[TARGET] Command: {' '.join(cmd)}")
|
| 235 |
|
| 236 |
try:
|
| 237 |
# For now, simulate video generation (replace with actual inference)
|
|
|
|
| 252 |
|
| 253 |
def _simulate_video_generation(self, config: dict):
|
| 254 |
"""Simulate video generation (replace with actual OmniAvatar inference)"""
|
| 255 |
+
logger.info("[VIDEO] Simulating OmniAvatar video generation...")
|
| 256 |
|
| 257 |
# Create a mock MP4 file
|
| 258 |
output_dir = Path("./outputs")
|
|
|
|
| 311 |
|
| 312 |
# Global video engine instance
|
| 313 |
video_engine = OmniAvatarVideoEngine()
|
| 314 |
+
|
|
@@ -25,11 +25,11 @@ class RobustTTSClient:
|
|
| 25 |
try:
|
| 26 |
logger.info("Setting up robust placeholder TTS...")
|
| 27 |
self.model_loaded = True
|
| 28 |
-
logger.info("
|
| 29 |
return True
|
| 30 |
|
| 31 |
except Exception as e:
|
| 32 |
-
logger.error(f"
|
| 33 |
# Even if something goes wrong, we can still generate audio
|
| 34 |
self.model_loaded = True
|
| 35 |
return True
|
|
@@ -117,13 +117,13 @@ class RobustTTSClient:
|
|
| 117 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 118 |
temp_file.close()
|
| 119 |
|
| 120 |
-
logger.info(f"
|
| 121 |
logger.info(f"π Audio details: {len(audio_data)/sample_rate:.1f}s, {sample_rate}Hz")
|
| 122 |
logger.warning("π Using placeholder audio - Real TTS coming in future update")
|
| 123 |
return temp_file.name
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
-
logger.error(f"
|
| 127 |
logger.error(f"Exception type: {type(e).__name__}")
|
| 128 |
|
| 129 |
# Last resort: create minimal audio file
|
|
@@ -137,9 +137,10 @@ class RobustTTSClient:
|
|
| 137 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 138 |
temp_file.close()
|
| 139 |
|
| 140 |
-
logger.info(f"
|
| 141 |
return temp_file.name
|
| 142 |
|
| 143 |
except Exception as final_error:
|
| 144 |
-
logger.error(f"
|
| 145 |
raise Exception(f"Complete TTS failure: {final_error}")
|
|
|
|
|
|
| 25 |
try:
|
| 26 |
logger.info("Setting up robust placeholder TTS...")
|
| 27 |
self.model_loaded = True
|
| 28 |
+
logger.info("SUCCESS: Robust TTS ready (placeholder audio mode)")
|
| 29 |
return True
|
| 30 |
|
| 31 |
except Exception as e:
|
| 32 |
+
logger.error(f"ERROR: Unexpected error in TTS setup: {e}")
|
| 33 |
# Even if something goes wrong, we can still generate audio
|
| 34 |
self.model_loaded = True
|
| 35 |
return True
|
|
|
|
| 117 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 118 |
temp_file.close()
|
| 119 |
|
| 120 |
+
logger.info(f"SUCCESS: Generated audio file: {temp_file.name}")
|
| 121 |
logger.info(f"π Audio details: {len(audio_data)/sample_rate:.1f}s, {sample_rate}Hz")
|
| 122 |
logger.warning("π Using placeholder audio - Real TTS coming in future update")
|
| 123 |
return temp_file.name
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
+
logger.error(f"ERROR: Critical error in audio generation: {str(e)}")
|
| 127 |
logger.error(f"Exception type: {type(e).__name__}")
|
| 128 |
|
| 129 |
# Last resort: create minimal audio file
|
|
|
|
| 137 |
sf.write(temp_file.name, audio_data, samplerate=sample_rate)
|
| 138 |
temp_file.close()
|
| 139 |
|
| 140 |
+
logger.info(f"SUCCESS: Created fallback audio: {temp_file.name}")
|
| 141 |
return temp_file.name
|
| 142 |
|
| 143 |
except Exception as final_error:
|
| 144 |
+
logger.error(f"ERROR: Even fallback audio failed: {final_error}")
|
| 145 |
raise Exception(f"Complete TTS failure: {final_error}")
|
| 146 |
+
|
|
@@ -44,16 +44,16 @@ class OmniAvatarSetup:
|
|
| 44 |
|
| 45 |
try:
|
| 46 |
import torch
|
| 47 |
-
logger.info(f"
|
| 48 |
|
| 49 |
if torch.cuda.is_available():
|
| 50 |
-
logger.info(f"
|
| 51 |
-
logger.info(f"
|
| 52 |
else:
|
| 53 |
-
logger.warning("
|
| 54 |
|
| 55 |
except ImportError:
|
| 56 |
-
logger.error("
|
| 57 |
return False
|
| 58 |
|
| 59 |
return True
|
|
@@ -64,7 +64,7 @@ class OmniAvatarSetup:
|
|
| 64 |
result = subprocess.run(['huggingface-cli', '--version'],
|
| 65 |
capture_output=True, text=True)
|
| 66 |
if result.returncode == 0:
|
| 67 |
-
logger.info("
|
| 68 |
return True
|
| 69 |
except FileNotFoundError:
|
| 70 |
pass
|
|
@@ -73,10 +73,10 @@ class OmniAvatarSetup:
|
|
| 73 |
try:
|
| 74 |
subprocess.run([sys.executable, '-m', 'pip', 'install',
|
| 75 |
'huggingface_hub[cli]'], check=True)
|
| 76 |
-
logger.info("
|
| 77 |
return True
|
| 78 |
except subprocess.CalledProcessError as e:
|
| 79 |
-
logger.error(f"
|
| 80 |
return False
|
| 81 |
|
| 82 |
def create_directory_structure(self):
|
|
@@ -96,27 +96,27 @@ class OmniAvatarSetup:
|
|
| 96 |
|
| 97 |
for directory in directories:
|
| 98 |
directory.mkdir(parents=True, exist_ok=True)
|
| 99 |
-
logger.info(f"
|
| 100 |
|
| 101 |
def download_models(self):
|
| 102 |
"""Download all required models"""
|
| 103 |
-
logger.info("
|
| 104 |
-
logger.info("
|
| 105 |
|
| 106 |
response = input("Continue with download? (y/N): ")
|
| 107 |
if response.lower() != 'y':
|
| 108 |
-
logger.info("
|
| 109 |
return False
|
| 110 |
|
| 111 |
for model_name, model_info in self.models.items():
|
| 112 |
logger.info(f"π₯ Downloading {model_name} ({model_info['size']})...")
|
| 113 |
-
logger.info(f"
|
| 114 |
|
| 115 |
local_dir = self.models_dir / model_name
|
| 116 |
|
| 117 |
# Skip if already exists and has content
|
| 118 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 119 |
-
logger.info(f"
|
| 120 |
continue
|
| 121 |
|
| 122 |
try:
|
|
@@ -126,37 +126,37 @@ class OmniAvatarSetup:
|
|
| 126 |
'--local-dir', str(local_dir)
|
| 127 |
]
|
| 128 |
|
| 129 |
-
logger.info(f"
|
| 130 |
result = subprocess.run(cmd, check=True)
|
| 131 |
-
logger.info(f"
|
| 132 |
|
| 133 |
except subprocess.CalledProcessError as e:
|
| 134 |
-
logger.error(f"
|
| 135 |
return False
|
| 136 |
|
| 137 |
-
logger.info("
|
| 138 |
return True
|
| 139 |
|
| 140 |
def run_setup(self):
|
| 141 |
"""Run the complete setup process"""
|
| 142 |
-
logger.info("
|
| 143 |
|
| 144 |
if not self.check_dependencies():
|
| 145 |
-
logger.error("
|
| 146 |
return False
|
| 147 |
|
| 148 |
if not self.install_huggingface_cli():
|
| 149 |
-
logger.error("
|
| 150 |
return False
|
| 151 |
|
| 152 |
self.create_directory_structure()
|
| 153 |
|
| 154 |
if not self.download_models():
|
| 155 |
-
logger.error("
|
| 156 |
return False
|
| 157 |
|
| 158 |
logger.info("π OmniAvatar-14B setup completed successfully!")
|
| 159 |
-
logger.info("
|
| 160 |
return True
|
| 161 |
|
| 162 |
def main():
|
|
@@ -165,3 +165,4 @@ def main():
|
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
main()
|
|
|
|
|
|
| 44 |
|
| 45 |
try:
|
| 46 |
import torch
|
| 47 |
+
logger.info(f"SUCCESS: PyTorch version: {torch.__version__}")
|
| 48 |
|
| 49 |
if torch.cuda.is_available():
|
| 50 |
+
logger.info(f"SUCCESS: CUDA available: {torch.version.cuda}")
|
| 51 |
+
logger.info(f"SUCCESS: GPU devices: {torch.cuda.device_count()}")
|
| 52 |
else:
|
| 53 |
+
logger.warning("WARNING: CUDA not available - will use CPU (slower)")
|
| 54 |
|
| 55 |
except ImportError:
|
| 56 |
+
logger.error("ERROR: PyTorch not installed!")
|
| 57 |
return False
|
| 58 |
|
| 59 |
return True
|
|
|
|
| 64 |
result = subprocess.run(['huggingface-cli', '--version'],
|
| 65 |
capture_output=True, text=True)
|
| 66 |
if result.returncode == 0:
|
| 67 |
+
logger.info("SUCCESS: Hugging Face CLI available")
|
| 68 |
return True
|
| 69 |
except FileNotFoundError:
|
| 70 |
pass
|
|
|
|
| 73 |
try:
|
| 74 |
subprocess.run([sys.executable, '-m', 'pip', 'install',
|
| 75 |
'huggingface_hub[cli]'], check=True)
|
| 76 |
+
logger.info("SUCCESS: Hugging Face CLI installed")
|
| 77 |
return True
|
| 78 |
except subprocess.CalledProcessError as e:
|
| 79 |
+
logger.error(f"ERROR: Failed to install Hugging Face CLI: {e}")
|
| 80 |
return False
|
| 81 |
|
| 82 |
def create_directory_structure(self):
|
|
|
|
| 96 |
|
| 97 |
for directory in directories:
|
| 98 |
directory.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
logger.info(f"SUCCESS: Created: {directory}")
|
| 100 |
|
| 101 |
def download_models(self):
|
| 102 |
"""Download all required models"""
|
| 103 |
+
logger.info("[PROCESS] Starting model downloads...")
|
| 104 |
+
logger.info("WARNING: This will download approximately 30GB of models!")
|
| 105 |
|
| 106 |
response = input("Continue with download? (y/N): ")
|
| 107 |
if response.lower() != 'y':
|
| 108 |
+
logger.info("ERROR: Download cancelled by user")
|
| 109 |
return False
|
| 110 |
|
| 111 |
for model_name, model_info in self.models.items():
|
| 112 |
logger.info(f"π₯ Downloading {model_name} ({model_info['size']})...")
|
| 113 |
+
logger.info(f"[INFO] {model_info['description']}")
|
| 114 |
|
| 115 |
local_dir = self.models_dir / model_name
|
| 116 |
|
| 117 |
# Skip if already exists and has content
|
| 118 |
if local_dir.exists() and any(local_dir.iterdir()):
|
| 119 |
+
logger.info(f"SUCCESS: {model_name} already exists, skipping...")
|
| 120 |
continue
|
| 121 |
|
| 122 |
try:
|
|
|
|
| 126 |
'--local-dir', str(local_dir)
|
| 127 |
]
|
| 128 |
|
| 129 |
+
logger.info(f"[LAUNCH] Running: {' '.join(cmd)}")
|
| 130 |
result = subprocess.run(cmd, check=True)
|
| 131 |
+
logger.info(f"SUCCESS: {model_name} downloaded successfully!")
|
| 132 |
|
| 133 |
except subprocess.CalledProcessError as e:
|
| 134 |
+
logger.error(f"ERROR: Failed to download {model_name}: {e}")
|
| 135 |
return False
|
| 136 |
|
| 137 |
+
logger.info("SUCCESS: All models downloaded successfully!")
|
| 138 |
return True
|
| 139 |
|
| 140 |
def run_setup(self):
|
| 141 |
"""Run the complete setup process"""
|
| 142 |
+
logger.info("[LAUNCH] Starting OmniAvatar-14B setup...")
|
| 143 |
|
| 144 |
if not self.check_dependencies():
|
| 145 |
+
logger.error("ERROR: Dependencies check failed!")
|
| 146 |
return False
|
| 147 |
|
| 148 |
if not self.install_huggingface_cli():
|
| 149 |
+
logger.error("ERROR: Failed to install Hugging Face CLI!")
|
| 150 |
return False
|
| 151 |
|
| 152 |
self.create_directory_structure()
|
| 153 |
|
| 154 |
if not self.download_models():
|
| 155 |
+
logger.error("ERROR: Model download failed!")
|
| 156 |
return False
|
| 157 |
|
| 158 |
logger.info("π OmniAvatar-14B setup completed successfully!")
|
| 159 |
+
logger.info("TIP: You can now run the full avatar generation!")
|
| 160 |
return True
|
| 161 |
|
| 162 |
def main():
|
|
|
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
main()
|
| 168 |
+
|
|
@@ -35,11 +35,11 @@ class SimpleTTSClient:
|
|
| 35 |
self.model = VitsModel.from_pretrained(model_name).to(self.device)
|
| 36 |
|
| 37 |
self.model_loaded = True
|
| 38 |
-
logger.info("
|
| 39 |
return True
|
| 40 |
|
| 41 |
except Exception as e:
|
| 42 |
-
logger.error(f"
|
| 43 |
logger.info("Falling back to basic TTS approach...")
|
| 44 |
return await self._load_fallback_model()
|
| 45 |
|
|
@@ -60,11 +60,11 @@ class SimpleTTSClient:
|
|
| 60 |
|
| 61 |
self.model_loaded = True
|
| 62 |
self.use_fallback = True
|
| 63 |
-
logger.info("
|
| 64 |
return True
|
| 65 |
|
| 66 |
except Exception as e:
|
| 67 |
-
logger.error(f"
|
| 68 |
return False
|
| 69 |
|
| 70 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
@@ -108,9 +108,10 @@ class SimpleTTSClient:
|
|
| 108 |
sf.write(temp_file.name, audio_data, samplerate=16000)
|
| 109 |
temp_file.close()
|
| 110 |
|
| 111 |
-
logger.info(f"
|
| 112 |
return temp_file.name
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
-
logger.error(f"
|
| 116 |
raise Exception(f"TTS generation failed: {e}")
|
|
|
|
|
|
| 35 |
self.model = VitsModel.from_pretrained(model_name).to(self.device)
|
| 36 |
|
| 37 |
self.model_loaded = True
|
| 38 |
+
logger.info("SUCCESS: VITS TTS model loaded successfully")
|
| 39 |
return True
|
| 40 |
|
| 41 |
except Exception as e:
|
| 42 |
+
logger.error(f"ERROR: Failed to load VITS model: {e}")
|
| 43 |
logger.info("Falling back to basic TTS approach...")
|
| 44 |
return await self._load_fallback_model()
|
| 45 |
|
|
|
|
| 60 |
|
| 61 |
self.model_loaded = True
|
| 62 |
self.use_fallback = True
|
| 63 |
+
logger.info("SUCCESS: Fallback TTS model loaded successfully")
|
| 64 |
return True
|
| 65 |
|
| 66 |
except Exception as e:
|
| 67 |
+
logger.error(f"ERROR: All TTS models failed to load: {e}")
|
| 68 |
return False
|
| 69 |
|
| 70 |
async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str:
|
|
|
|
| 108 |
sf.write(temp_file.name, audio_data, samplerate=16000)
|
| 109 |
temp_file.close()
|
| 110 |
|
| 111 |
+
logger.info(f"SUCCESS: Generated speech audio: {temp_file.name}")
|
| 112 |
return temp_file.name
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
+
logger.error(f"ERROR: Error generating speech: {e}")
|
| 116 |
raise Exception(f"TTS generation failed: {e}")
|
| 117 |
+
|
|
@@ -28,7 +28,7 @@ def check_models_available():
|
|
| 28 |
|
| 29 |
def download_models():
|
| 30 |
"""Download OmniAvatar models"""
|
| 31 |
-
logger.info("
|
| 32 |
logger.info("=" * 60)
|
| 33 |
logger.info("This application generates AVATAR VIDEOS, not just audio.")
|
| 34 |
logger.info("Video generation requires ~30GB of OmniAvatar models.")
|
|
@@ -40,51 +40,52 @@ def download_models():
|
|
| 40 |
capture_output=True, text=True)
|
| 41 |
|
| 42 |
if result.returncode == 0:
|
| 43 |
-
logger.info("
|
| 44 |
return True
|
| 45 |
else:
|
| 46 |
-
logger.error(f"
|
| 47 |
return False
|
| 48 |
|
| 49 |
except Exception as e:
|
| 50 |
-
logger.error(f"
|
| 51 |
return False
|
| 52 |
|
| 53 |
def main():
|
| 54 |
"""Main startup function"""
|
| 55 |
-
print("
|
| 56 |
print("=" * 55)
|
| 57 |
|
| 58 |
# Check if models are available
|
| 59 |
models_available, missing = check_models_available()
|
| 60 |
|
| 61 |
if not models_available:
|
| 62 |
-
print(f"
|
| 63 |
-
print("
|
| 64 |
print("")
|
| 65 |
|
| 66 |
response = input("Download models now? (~30GB download) [y/N]: ")
|
| 67 |
if response.lower() == 'y':
|
| 68 |
success = download_models()
|
| 69 |
if not success:
|
| 70 |
-
print("
|
| 71 |
-
print("
|
| 72 |
else:
|
| 73 |
-
print("
|
| 74 |
else:
|
| 75 |
-
print("
|
| 76 |
|
| 77 |
-
print("\n
|
| 78 |
|
| 79 |
# Start the main application
|
| 80 |
try:
|
| 81 |
import app
|
| 82 |
# The app.py will handle the rest
|
| 83 |
except Exception as e:
|
| 84 |
-
print(f"
|
| 85 |
return 1
|
| 86 |
|
| 87 |
return 0
|
| 88 |
|
| 89 |
if __name__ == "__main__":
|
| 90 |
sys.exit(main())
|
|
|
|
|
|
| 28 |
|
| 29 |
def download_models():
|
| 30 |
"""Download OmniAvatar models"""
|
| 31 |
+
logger.info("[VIDEO] OMNIAVATAR VIDEO GENERATION - Model Download Required")
|
| 32 |
logger.info("=" * 60)
|
| 33 |
logger.info("This application generates AVATAR VIDEOS, not just audio.")
|
| 34 |
logger.info("Video generation requires ~30GB of OmniAvatar models.")
|
|
|
|
| 40 |
capture_output=True, text=True)
|
| 41 |
|
| 42 |
if result.returncode == 0:
|
| 43 |
+
logger.info("SUCCESS: Models downloaded successfully!")
|
| 44 |
return True
|
| 45 |
else:
|
| 46 |
+
logger.error(f"ERROR: Model download failed: {result.stderr}")
|
| 47 |
return False
|
| 48 |
|
| 49 |
except Exception as e:
|
| 50 |
+
logger.error(f"ERROR: Error downloading models: {e}")
|
| 51 |
return False
|
| 52 |
|
| 53 |
def main():
|
| 54 |
"""Main startup function"""
|
| 55 |
+
print("[VIDEO] STARTING OMNIAVATAR VIDEO GENERATION APPLICATION")
|
| 56 |
print("=" * 55)
|
| 57 |
|
| 58 |
# Check if models are available
|
| 59 |
models_available, missing = check_models_available()
|
| 60 |
|
| 61 |
if not models_available:
|
| 62 |
+
print(f"WARNING: Missing video generation models: {missing}")
|
| 63 |
+
print("[TARGET] This is a VIDEO generation app - models are required!")
|
| 64 |
print("")
|
| 65 |
|
| 66 |
response = input("Download models now? (~30GB download) [y/N]: ")
|
| 67 |
if response.lower() == 'y':
|
| 68 |
success = download_models()
|
| 69 |
if not success:
|
| 70 |
+
print("ERROR: Model download failed. App will run in limited mode.")
|
| 71 |
+
print("TIP: Please run 'python download_models_production.py' manually")
|
| 72 |
else:
|
| 73 |
+
print("WARNING: Starting app without video models (limited functionality)")
|
| 74 |
else:
|
| 75 |
+
print("SUCCESS: All OmniAvatar models found - VIDEO GENERATION READY!")
|
| 76 |
|
| 77 |
+
print("\n[LAUNCH] Starting FastAPI + Gradio application...")
|
| 78 |
|
| 79 |
# Start the main application
|
| 80 |
try:
|
| 81 |
import app
|
| 82 |
# The app.py will handle the rest
|
| 83 |
except Exception as e:
|
| 84 |
+
print(f"ERROR: Failed to start application: {e}")
|
| 85 |
return 1
|
| 86 |
|
| 87 |
return 0
|
| 88 |
|
| 89 |
if __name__ == "__main__":
|
| 90 |
sys.exit(main())
|
| 91 |
+
|
|
@@ -13,11 +13,12 @@ async def test_hf_tts():
|
|
| 13 |
try:
|
| 14 |
# Test TTS generation
|
| 15 |
audio_path = await client.text_to_speech("Hello, this is a test of HuggingFace TTS!")
|
| 16 |
-
print(f"
|
| 17 |
return True
|
| 18 |
except Exception as e:
|
| 19 |
-
print(f"
|
| 20 |
return False
|
| 21 |
|
| 22 |
if __name__ == "__main__":
|
| 23 |
asyncio.run(test_hf_tts())
|
|
|
|
|
|
| 13 |
try:
|
| 14 |
# Test TTS generation
|
| 15 |
audio_path = await client.text_to_speech("Hello, this is a test of HuggingFace TTS!")
|
| 16 |
+
print(f"SUCCESS: TTS Success! Audio saved to: {audio_path}")
|
| 17 |
return True
|
| 18 |
except Exception as e:
|
| 19 |
+
print(f"ERROR: TTS Failed: {e}")
|
| 20 |
return False
|
| 21 |
|
| 22 |
if __name__ == "__main__":
|
| 23 |
asyncio.run(test_hf_tts())
|
| 24 |
+
|
|
@@ -29,7 +29,7 @@ async def test_advanced_tts():
|
|
| 29 |
success = await client.load_models()
|
| 30 |
|
| 31 |
if success:
|
| 32 |
-
print("
|
| 33 |
|
| 34 |
# Get model info
|
| 35 |
info = client.get_model_info()
|
|
@@ -45,7 +45,7 @@ async def test_advanced_tts():
|
|
| 45 |
print(f"Voice ID: {voice_id}")
|
| 46 |
|
| 47 |
audio_path = await client.text_to_speech(test_text, voice_id)
|
| 48 |
-
print(f"
|
| 49 |
|
| 50 |
# Check file
|
| 51 |
if os.path.exists(audio_path):
|
|
@@ -53,20 +53,20 @@ async def test_advanced_tts():
|
|
| 53 |
print(f"π Audio file size: {size} bytes")
|
| 54 |
|
| 55 |
if size > 1000:
|
| 56 |
-
print("
|
| 57 |
return True
|
| 58 |
else:
|
| 59 |
-
print("
|
| 60 |
return False
|
| 61 |
else:
|
| 62 |
-
print("
|
| 63 |
return False
|
| 64 |
else:
|
| 65 |
-
print("
|
| 66 |
return False
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
-
print(f"
|
| 70 |
import traceback
|
| 71 |
traceback.print_exc()
|
| 72 |
return False
|
|
@@ -90,7 +90,7 @@ async def test_tts_manager():
|
|
| 90 |
success = await manager.load_models()
|
| 91 |
|
| 92 |
if success:
|
| 93 |
-
print("
|
| 94 |
|
| 95 |
# Get info
|
| 96 |
info = manager.get_tts_info()
|
|
@@ -105,7 +105,7 @@ async def test_tts_manager():
|
|
| 105 |
print(f"Voice ID: {voice_id}")
|
| 106 |
|
| 107 |
audio_path, method = await manager.text_to_speech(test_text, voice_id)
|
| 108 |
-
print(f"
|
| 109 |
print(f"ποΈ Method used: {method}")
|
| 110 |
|
| 111 |
# Check file
|
|
@@ -114,14 +114,14 @@ async def test_tts_manager():
|
|
| 114 |
print(f"π Audio file size: {size} bytes")
|
| 115 |
return True
|
| 116 |
else:
|
| 117 |
-
print("
|
| 118 |
return False
|
| 119 |
else:
|
| 120 |
-
print("
|
| 121 |
return False
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
-
print(f"
|
| 125 |
import traceback
|
| 126 |
traceback.print_exc()
|
| 127 |
return False
|
|
@@ -147,7 +147,7 @@ async def main():
|
|
| 147 |
|
| 148 |
test_names = ["Advanced TTS Direct", "TTS Manager with Fallback"]
|
| 149 |
for i, (name, result) in enumerate(zip(test_names, results)):
|
| 150 |
-
status = "
|
| 151 |
print(f"{i+1}. {name}: {status}")
|
| 152 |
|
| 153 |
passed = sum(results)
|
|
@@ -160,11 +160,11 @@ async def main():
|
|
| 160 |
if passed == total:
|
| 161 |
print("π All components working perfectly!")
|
| 162 |
else:
|
| 163 |
-
print("
|
| 164 |
else:
|
| 165 |
print("π₯ All tests failed - check dependencies and installation")
|
| 166 |
|
| 167 |
-
print("\n
|
| 168 |
print("1. Install missing dependencies: pip install transformers datasets")
|
| 169 |
print("2. Run the main app: python app.py")
|
| 170 |
print("3. Test via /health endpoint")
|
|
@@ -175,3 +175,4 @@ async def main():
|
|
| 175 |
if __name__ == "__main__":
|
| 176 |
success = asyncio.run(main())
|
| 177 |
exit(0 if success else 1)
|
|
|
|
|
|
| 29 |
success = await client.load_models()
|
| 30 |
|
| 31 |
if success:
|
| 32 |
+
print("SUCCESS: Models loaded successfully!")
|
| 33 |
|
| 34 |
# Get model info
|
| 35 |
info = client.get_model_info()
|
|
|
|
| 45 |
print(f"Voice ID: {voice_id}")
|
| 46 |
|
| 47 |
audio_path = await client.text_to_speech(test_text, voice_id)
|
| 48 |
+
print(f"SUCCESS: TTS SUCCESS: Generated audio at {audio_path}")
|
| 49 |
|
| 50 |
# Check file
|
| 51 |
if os.path.exists(audio_path):
|
|
|
|
| 53 |
print(f"π Audio file size: {size} bytes")
|
| 54 |
|
| 55 |
if size > 1000:
|
| 56 |
+
print("SUCCESS: Audio file appears valid!")
|
| 57 |
return True
|
| 58 |
else:
|
| 59 |
+
print("WARNING: Audio file seems too small")
|
| 60 |
return False
|
| 61 |
else:
|
| 62 |
+
print("ERROR: Audio file not found")
|
| 63 |
return False
|
| 64 |
else:
|
| 65 |
+
print("ERROR: Model loading failed")
|
| 66 |
return False
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
+
print(f"ERROR: Test failed: {e}")
|
| 70 |
import traceback
|
| 71 |
traceback.print_exc()
|
| 72 |
return False
|
|
|
|
| 90 |
success = await manager.load_models()
|
| 91 |
|
| 92 |
if success:
|
| 93 |
+
print("SUCCESS: TTS Manager loaded successfully!")
|
| 94 |
|
| 95 |
# Get info
|
| 96 |
info = manager.get_tts_info()
|
|
|
|
| 105 |
print(f"Voice ID: {voice_id}")
|
| 106 |
|
| 107 |
audio_path, method = await manager.text_to_speech(test_text, voice_id)
|
| 108 |
+
print(f"SUCCESS: TTS Manager SUCCESS: Generated audio at {audio_path}")
|
| 109 |
print(f"ποΈ Method used: {method}")
|
| 110 |
|
| 111 |
# Check file
|
|
|
|
| 114 |
print(f"π Audio file size: {size} bytes")
|
| 115 |
return True
|
| 116 |
else:
|
| 117 |
+
print("ERROR: Audio file not found")
|
| 118 |
return False
|
| 119 |
else:
|
| 120 |
+
print("ERROR: TTS Manager loading failed")
|
| 121 |
return False
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
+
print(f"ERROR: TTS Manager test failed: {e}")
|
| 125 |
import traceback
|
| 126 |
traceback.print_exc()
|
| 127 |
return False
|
|
|
|
| 147 |
|
| 148 |
test_names = ["Advanced TTS Direct", "TTS Manager with Fallback"]
|
| 149 |
for i, (name, result) in enumerate(zip(test_names, results)):
|
| 150 |
+
status = "SUCCESS: PASS" if result else "ERROR: FAIL"
|
| 151 |
print(f"{i+1}. {name}: {status}")
|
| 152 |
|
| 153 |
passed = sum(results)
|
|
|
|
| 160 |
if passed == total:
|
| 161 |
print("π All components working perfectly!")
|
| 162 |
else:
|
| 163 |
+
print("WARNING: Some components failed, but system should still work")
|
| 164 |
else:
|
| 165 |
print("π₯ All tests failed - check dependencies and installation")
|
| 166 |
|
| 167 |
+
print("\n[INFO] Next steps:")
|
| 168 |
print("1. Install missing dependencies: pip install transformers datasets")
|
| 169 |
print("2. Run the main app: python app.py")
|
| 170 |
print("3. Test via /health endpoint")
|
|
|
|
| 175 |
if __name__ == "__main__":
|
| 176 |
success = asyncio.run(main())
|
| 177 |
exit(0 if success else 1)
|
| 178 |
+
|