File size: 2,464 Bytes
c49b21b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import psutil
import time
from datetime import datetime
from src.config import DATA_DIR, LAST_RUN_PATH

def health_status():
    """Enhanced health check that monitors actual service health"""
    try:
        # Check memory usage
        process = psutil.Process()
        memory_mb = process.memory_info().rss / 1024 / 1024
        cpu_percent = process.cpu_percent()

        # Check if scheduler is running
        scheduler_running = False
        try:
            with open(LAST_RUN_PATH, 'r') as f:
                last_run_str = f.read().strip()
                last_run = datetime.strptime(last_run_str, '%Y-%m-%d %H:%M:%S')
                # Consider scheduler healthy if it ran within last 45 minutes
                time_since_last_run = (datetime.now() - last_run).total_seconds()
                scheduler_running = time_since_last_run < 2700  # 45 minutes
        except Exception:
            scheduler_running = False

        # Check disk space (prefer DATA_DIR)
        disk_usage = psutil.disk_usage(DATA_DIR if os.path.exists(DATA_DIR) else '/')
        disk_free_gb = disk_usage.free / (1024**3)

        # Determine overall health
        health_issues = []
        # Memory checks
        if memory_mb > 1024:  # More than 1GB
            health_issues.append(f"High memory usage: {memory_mb:.1f}MB (over 1GB)")
        elif memory_mb > 512:  # More than 512MB for free plan
            health_issues.append(f"High memory usage: {memory_mb:.1f}MB (over 512MB)")

        if cpu_percent > 80:
            health_issues.append(f"High CPU usage: {cpu_percent:.1f}%")

        if disk_free_gb < 1:  # Less than 1GB free
            health_issues.append(f"Low disk space: {disk_free_gb:.1f}GB free")

        if not scheduler_running:
            health_issues.append("Scheduler not running or stale")

        status = "healthy" if not health_issues else "degraded"

        return {
            "status": status,
            "timestamp": datetime.now().isoformat(),
            "metrics": {
                "memory_mb": round(memory_mb, 1),
                "cpu_percent": round(cpu_percent, 1),
                "disk_free_gb": round(disk_free_gb, 1),
                "scheduler_running": scheduler_running
            },
            "issues": health_issues
        }

    except Exception as e:
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "error": str(e)
        }