|
import os |
|
import time |
|
import subprocess |
|
import sys |
|
import threading |
|
import asyncio |
|
from dotenv import load_dotenv |
|
import httpx |
|
import os |
|
|
|
from src import config as app_config |
|
|
|
|
|
|
|
|
|
if os.path.exists(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src", "main.py"))): |
|
PIPELINE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src", "main.py")) |
|
PIPELINE_DIR = os.path.dirname(PIPELINE_PATH) |
|
else: |
|
raise FileNotFoundError("src/main.py not found!") |
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
def _parse_int_env(name: str, default_val: int) -> int: |
|
raw = os.getenv(name, str(default_val)) |
|
if isinstance(raw, str): |
|
|
|
cleaned = raw.split('#', 1)[0].strip() |
|
if cleaned == "": |
|
return int(default_val) |
|
try: |
|
return int(cleaned) |
|
except Exception: |
|
print(f"[Scheduler] Warning: {name}='{raw}' is not a valid int. Using default {default_val}.") |
|
return int(default_val) |
|
try: |
|
return int(raw) |
|
except Exception: |
|
return int(default_val) |
|
|
|
TRIGGER_HEALTH_URL = os.getenv( |
|
"TRIGGER_HEALTH_URL", |
|
"https://advisor-trigger-ki3t.onrender.com/health, https://advisorai-data-1ew2.onrender.com/health" |
|
) |
|
PING_INTERVAL = _parse_int_env("TRIGGER_PING_INTERVAL", 300) |
|
|
|
PIPELINE_INTERVAL = _parse_int_env("PIPELINE_INTERVAL", 3600) |
|
|
|
|
|
|
|
|
|
async def ping_remote(): |
|
""" |
|
Continuously GET each URL in TRIGGER_HEALTH_URL (comma-separated) every PING_INTERVAL seconds, |
|
backing off on failure (up to 2.5 minutes). |
|
""" |
|
urls = [u.strip() for u in TRIGGER_HEALTH_URL.split(",") if u.strip()] |
|
backoff = min(PING_INTERVAL, 5) |
|
async with httpx.AsyncClient(timeout=10.0) as client: |
|
while True: |
|
all_success = True |
|
for url in urls: |
|
try: |
|
resp = await client.get(url) |
|
resp.raise_for_status() |
|
print(f"[Pinger] {url} -> {resp.status_code}") |
|
except Exception as e: |
|
print(f"[Pinger] error pinging {url}: {e}") |
|
all_success = False |
|
if all_success: |
|
backoff = PING_INTERVAL |
|
await asyncio.sleep(PING_INTERVAL) |
|
else: |
|
await asyncio.sleep(backoff) |
|
backoff = min(backoff * 2, 150) |
|
|
|
def start_async_ping(): |
|
""" |
|
Spin up a dedicated asyncio loop in a daemon thread |
|
to run ping_remote() forever. |
|
""" |
|
loop = asyncio.new_event_loop() |
|
asyncio.set_event_loop(loop) |
|
loop.create_task(ping_remote()) |
|
loop.run_forever() |
|
|
|
|
|
threading.Thread(target=start_async_ping, daemon=True).start() |
|
print("[Scheduler] Started background ping thread") |
|
|
|
|
|
|
|
|
|
import traceback |
|
|
|
while True: |
|
from datetime import datetime |
|
last_run = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
print(f"[Scheduler] Running pipeline... Last run: {last_run}") |
|
|
|
try: |
|
with open(app_config.LAST_RUN_PATH, 'w') as f: |
|
f.write(last_run) |
|
except Exception as e: |
|
print(f"[Scheduler] Failed to write last_run.txt: {e}") |
|
try: |
|
|
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) |
|
print(f"[Scheduler] Project root: {project_root}") |
|
print(f"[Scheduler] Pipeline path: {PIPELINE_PATH}") |
|
|
|
|
|
result = subprocess.run( |
|
[sys.executable, PIPELINE_PATH], |
|
cwd='/', |
|
capture_output=True, |
|
text=True, |
|
env=os.environ.copy() |
|
) |
|
print(f"[Scheduler] Pipeline finished with code {result.returncode}") |
|
|
|
if result.stdout: |
|
print("[Scheduler] STDOUT:\n", result.stdout) |
|
if result.stderr: |
|
print("[Scheduler] STDERR:\n", result.stderr) |
|
|
|
|
|
if result.returncode != 0: |
|
raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr) |
|
|
|
except subprocess.CalledProcessError as e: |
|
print(f"[Scheduler] Pipeline execution failed with return code {e.returncode}") |
|
print(f"[Scheduler] STDOUT:\n{e.stdout}") |
|
print(f"[Scheduler] STDERR:\n{e.stderr}") |
|
except Exception as e: |
|
print(f"[Scheduler] Exception running pipeline: {e}") |
|
print(traceback.format_exc()) |
|
|
|
print(f"[Scheduler] Sleeping for {PIPELINE_INTERVAL // 60} minutes...") |
|
time.sleep(PIPELINE_INTERVAL) |
|
|