#!/usr/bin/env python3 """ Simple monitoring script to track service health and resource usage """ import os import time import psutil import json from datetime import datetime from src import config as app_config def get_system_stats(): """Get current system statistics""" try: process = psutil.Process() # Memory info memory_info = process.memory_info() memory_mb = memory_info.rss / 1024 / 1024 # CPU info cpu_percent = process.cpu_percent(interval=1) # Disk info (prefer DATA_DIR) disk_root = app_config.DATA_DIR if os.path.exists(app_config.DATA_DIR) else '/' disk_usage = psutil.disk_usage(disk_root) disk_free_gb = disk_usage.free / (1024**3) disk_used_percent = (disk_usage.used / disk_usage.total) * 100 # Process info num_threads = process.num_threads() return { "timestamp": datetime.now().isoformat(), "memory_mb": round(memory_mb, 2), "cpu_percent": round(cpu_percent, 2), "disk_free_gb": round(disk_free_gb, 2), "disk_used_percent": round(disk_used_percent, 2), "num_threads": num_threads, "pid": process.pid } except Exception as e: return { "timestamp": datetime.now().isoformat(), "error": str(e) } def log_stats(): """Log system statistics to file""" stats = get_system_stats() # Create logs directory if it doesn't exist log_dir = app_config.LOG_DIR os.makedirs(log_dir, exist_ok=True) # Write to log file log_file = os.path.join(log_dir, "system_stats.jsonl") with open(log_file, "a") as f: f.write(json.dumps(stats) + "\n") # Print to stdout for supervisord print(f"[Monitor] {json.dumps(stats)}") # Check for issues if "error" not in stats: issues = [] if stats["memory_mb"] > 450: # 90% of 512MB limit issues.append(f"HIGH MEMORY: {stats['memory_mb']:.1f}MB") if stats["cpu_percent"] > 80: issues.append(f"HIGH CPU: {stats['cpu_percent']:.1f}%") if stats["disk_free_gb"] < 0.5: issues.append(f"LOW DISK: {stats['disk_free_gb']:.1f}GB free") if issues: print(f"[Monitor] ALERTS: {', '.join(issues)}") if __name__ == "__main__": print("[Monitor] Starting system monitoring...") while True: try: log_stats() time.sleep(60) # Log every minute except KeyboardInterrupt: print("[Monitor] Monitoring stopped") break except Exception as e: print(f"[Monitor] Error: {e}") time.sleep(60)