|
import os |
|
import psutil |
|
import time |
|
from datetime import datetime |
|
from src.config import DATA_DIR, LAST_RUN_PATH |
|
|
|
def health_status(): |
|
"""Enhanced health check that monitors actual service health""" |
|
try: |
|
|
|
process = psutil.Process() |
|
memory_mb = process.memory_info().rss / 1024 / 1024 |
|
cpu_percent = process.cpu_percent() |
|
|
|
|
|
scheduler_running = False |
|
try: |
|
with open(LAST_RUN_PATH, 'r') as f: |
|
last_run_str = f.read().strip() |
|
last_run = datetime.strptime(last_run_str, '%Y-%m-%d %H:%M:%S') |
|
|
|
time_since_last_run = (datetime.now() - last_run).total_seconds() |
|
scheduler_running = time_since_last_run < 2700 |
|
except Exception: |
|
scheduler_running = False |
|
|
|
|
|
disk_usage = psutil.disk_usage(DATA_DIR if os.path.exists(DATA_DIR) else '/') |
|
disk_free_gb = disk_usage.free / (1024**3) |
|
|
|
|
|
health_issues = [] |
|
|
|
if memory_mb > 1024: |
|
health_issues.append(f"High memory usage: {memory_mb:.1f}MB (over 1GB)") |
|
elif memory_mb > 512: |
|
health_issues.append(f"High memory usage: {memory_mb:.1f}MB (over 512MB)") |
|
|
|
if cpu_percent > 80: |
|
health_issues.append(f"High CPU usage: {cpu_percent:.1f}%") |
|
|
|
if disk_free_gb < 1: |
|
health_issues.append(f"Low disk space: {disk_free_gb:.1f}GB free") |
|
|
|
if not scheduler_running: |
|
health_issues.append("Scheduler not running or stale") |
|
|
|
status = "healthy" if not health_issues else "degraded" |
|
|
|
return { |
|
"status": status, |
|
"timestamp": datetime.now().isoformat(), |
|
"metrics": { |
|
"memory_mb": round(memory_mb, 1), |
|
"cpu_percent": round(cpu_percent, 1), |
|
"disk_free_gb": round(disk_free_gb, 1), |
|
"scheduler_running": scheduler_running |
|
}, |
|
"issues": health_issues |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "error", |
|
"timestamp": datetime.now().isoformat(), |
|
"error": str(e) |
|
} |