Maaroufabousaleh
f
c49b21b
raw
history blame
2.82 kB
#!/usr/bin/env python3
"""
Simple monitoring script to track service health and resource usage
"""
import os
import time
import psutil
import json
from datetime import datetime
from src import config as app_config
def get_system_stats():
"""Get current system statistics"""
try:
process = psutil.Process()
# Memory info
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
# CPU info
cpu_percent = process.cpu_percent(interval=1)
# Disk info (prefer DATA_DIR)
disk_root = app_config.DATA_DIR if os.path.exists(app_config.DATA_DIR) else '/'
disk_usage = psutil.disk_usage(disk_root)
disk_free_gb = disk_usage.free / (1024**3)
disk_used_percent = (disk_usage.used / disk_usage.total) * 100
# Process info
num_threads = process.num_threads()
return {
"timestamp": datetime.now().isoformat(),
"memory_mb": round(memory_mb, 2),
"cpu_percent": round(cpu_percent, 2),
"disk_free_gb": round(disk_free_gb, 2),
"disk_used_percent": round(disk_used_percent, 2),
"num_threads": num_threads,
"pid": process.pid
}
except Exception as e:
return {
"timestamp": datetime.now().isoformat(),
"error": str(e)
}
def log_stats():
"""Log system statistics to file"""
stats = get_system_stats()
# Create logs directory if it doesn't exist
log_dir = app_config.LOG_DIR
os.makedirs(log_dir, exist_ok=True)
# Write to log file
log_file = os.path.join(log_dir, "system_stats.jsonl")
with open(log_file, "a") as f:
f.write(json.dumps(stats) + "\n")
# Print to stdout for supervisord
print(f"[Monitor] {json.dumps(stats)}")
# Check for issues
if "error" not in stats:
issues = []
if stats["memory_mb"] > 450: # 90% of 512MB limit
issues.append(f"HIGH MEMORY: {stats['memory_mb']:.1f}MB")
if stats["cpu_percent"] > 80:
issues.append(f"HIGH CPU: {stats['cpu_percent']:.1f}%")
if stats["disk_free_gb"] < 0.5:
issues.append(f"LOW DISK: {stats['disk_free_gb']:.1f}GB free")
if issues:
print(f"[Monitor] ALERTS: {', '.join(issues)}")
if __name__ == "__main__":
print("[Monitor] Starting system monitoring...")
while True:
try:
log_stats()
time.sleep(60) # Log every minute
except KeyboardInterrupt:
print("[Monitor] Monitoring stopped")
break
except Exception as e:
print(f"[Monitor] Error: {e}")
time.sleep(60)