|
|
|
""" |
|
Memory-Optimized Main Pipeline for AdvisorAI Data Enhanced |
|
Addresses critical memory issues causing instance failures (512MB limit) |
|
""" |
|
|
|
import sys |
|
import os |
|
import gc |
|
import psutil |
|
from datetime import datetime |
|
from contextlib import contextmanager |
|
|
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "."))) |
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "fetchers"))) |
|
|
|
class MemoryMonitor: |
|
"""Memory monitoring and optimization utility""" |
|
|
|
def __init__(self, max_memory_mb=450): |
|
self.max_memory_mb = max_memory_mb |
|
self.process = psutil.Process() |
|
|
|
def get_memory_usage(self): |
|
"""Get current memory usage in MB""" |
|
return self.process.memory_info().rss / 1024 / 1024 |
|
|
|
def check_and_cleanup(self, operation_name=""): |
|
"""Check memory usage and cleanup if needed""" |
|
memory_mb = self.get_memory_usage() |
|
|
|
if memory_mb > self.max_memory_mb * 0.8: |
|
print(f"[MemOpt] High memory usage during {operation_name}: {memory_mb:.1f}MB") |
|
collected = gc.collect() |
|
new_memory_mb = self.get_memory_usage() |
|
print(f"[MemOpt] Memory after GC: {new_memory_mb:.1f}MB (freed {collected} objects)") |
|
|
|
if new_memory_mb > self.max_memory_mb * 0.9: |
|
print(f"[MemOpt] WARNING: Memory still high after cleanup") |
|
|
|
return memory_mb |
|
|
|
@contextmanager |
|
def memory_context(self, operation_name): |
|
"""Context manager for memory monitoring""" |
|
start_memory = self.get_memory_usage() |
|
print(f"[MemOpt] Starting {operation_name} - Memory: {start_memory:.1f}MB") |
|
|
|
try: |
|
yield |
|
finally: |
|
end_memory = self.get_memory_usage() |
|
diff = end_memory - start_memory |
|
print(f"[MemOpt] Finished {operation_name} - Memory: {end_memory:.1f}MB (Δ{diff:+.1f}MB)") |
|
|
|
|
|
if end_memory > self.max_memory_mb * 0.8: |
|
print(f"[MemOpt] Memory high after {operation_name}, forcing cleanup...") |
|
gc.collect() |
|
final_memory = self.get_memory_usage() |
|
print(f"[MemOpt] Memory after cleanup: {final_memory:.1f}MB") |
|
|
|
def run_fetchers_optimized(memory_monitor): |
|
"""Run fetchers with memory optimization""" |
|
try: |
|
with memory_monitor.memory_context("Fetchers"): |
|
|
|
from fetchers.main import main as fetchers_main |
|
|
|
print("[Pipeline] Starting data fetchers (memory optimized)...") |
|
result = fetchers_main() |
|
|
|
|
|
if 'fetchers.main' in sys.modules: |
|
del sys.modules['fetchers.main'] |
|
|
|
|
|
memory_monitor.check_and_cleanup("Fetchers") |
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f"[Pipeline] Error in fetchers: {e}") |
|
|
|
memory_monitor.check_and_cleanup("Fetchers (error)") |
|
return False |
|
|
|
def run_merge_optimized(memory_monitor): |
|
"""Run merge operations with memory optimization""" |
|
try: |
|
with memory_monitor.memory_context("Merge"): |
|
|
|
from merge import main as merge_main |
|
|
|
print("[Pipeline] Starting data merge (memory optimized)...") |
|
result = merge_main.main() |
|
|
|
|
|
if 'merge.main' in sys.modules: |
|
del sys.modules['merge.main'] |
|
|
|
|
|
memory_monitor.check_and_cleanup("Merge") |
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f"[Pipeline] Error in merge: {e}") |
|
|
|
memory_monitor.check_and_cleanup("Merge (error)") |
|
return False |
|
|
|
def main(): |
|
"""Memory-optimized main pipeline execution""" |
|
print("AdvisorAI Data Pipeline - Memory Optimized") |
|
print("=" * 50) |
|
print(f"Pipeline started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
|
|
|
|
memory_monitor = MemoryMonitor(max_memory_mb=450) |
|
|
|
initial_memory = memory_monitor.get_memory_usage() |
|
print(f"[Pipeline] Initial memory usage: {initial_memory:.1f}MB") |
|
|
|
|
|
if initial_memory > 200: |
|
print(f"[Pipeline] WARNING: High initial memory usage: {initial_memory:.1f}MB") |
|
memory_monitor.check_and_cleanup("Initial") |
|
|
|
try: |
|
|
|
print("\n" + "="*30) |
|
print("STEP 1: DATA FETCHERS") |
|
print("="*30) |
|
|
|
fetchers_success = run_fetchers_optimized(memory_monitor) |
|
|
|
if not fetchers_success: |
|
print("[Pipeline] Fetchers failed, but continuing to merge existing data...") |
|
|
|
|
|
mid_memory = memory_monitor.get_memory_usage() |
|
print(f"\n[Pipeline] Memory after fetchers: {mid_memory:.1f}MB") |
|
|
|
if mid_memory > 400: |
|
print("[Pipeline] Memory high after fetchers, forcing cleanup...") |
|
gc.collect() |
|
mid_memory = memory_monitor.get_memory_usage() |
|
print(f"[Pipeline] Memory after cleanup: {mid_memory:.1f}MB") |
|
|
|
|
|
print("\n" + "="*30) |
|
print("STEP 2: DATA MERGE") |
|
print("="*30) |
|
|
|
merge_success = run_merge_optimized(memory_monitor) |
|
|
|
if not merge_success: |
|
print("[Pipeline] Merge failed") |
|
return False |
|
|
|
|
|
final_memory = memory_monitor.get_memory_usage() |
|
print(f"\n[Pipeline] Final memory usage: {final_memory:.1f}MB") |
|
|
|
if final_memory > 450: |
|
print("⚠️ WARNING: Memory usage approaching limit - optimization needed") |
|
|
|
print(f"Pipeline ended at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
print("[OK] All steps completed successfully!") |
|
return True |
|
|
|
except Exception as e: |
|
import traceback |
|
print(f"[ERROR] Pipeline execution failed: {e}") |
|
print(traceback.format_exc()) |
|
|
|
|
|
print("[Pipeline] Emergency memory cleanup...") |
|
memory_monitor.check_and_cleanup("Emergency") |
|
|
|
return False |
|
|
|
if __name__ == "__main__": |
|
success = main() |
|
if not success: |
|
sys.exit(1) |
|
|