File size: 5,603 Bytes
c49b21b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import time
import subprocess
import sys
import threading
import asyncio
from dotenv import load_dotenv
import httpx
import os

from src import config as app_config

# -----------------------------------------------------------------------------
# LOCATE YOUR DATA-PIPELINE SCRIPT
# -----------------------------------------------------------------------------
if os.path.exists(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src", "main.py"))):
    PIPELINE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src", "main.py"))
    PIPELINE_DIR = os.path.dirname(PIPELINE_PATH)
else:
    raise FileNotFoundError("src/main.py not found!")

# -----------------------------------------------------------------------------
# CONFIGURATION (via ENV)
# -----------------------------------------------------------------------------
load_dotenv()
# URL to ping every N seconds (default 300s = 5min)
def _parse_int_env(name: str, default_val: int) -> int:
    raw = os.getenv(name, str(default_val))
    if isinstance(raw, str):
        # Strip inline comments and whitespace, e.g. "3600  # every hour"
        cleaned = raw.split('#', 1)[0].strip()
        if cleaned == "":
            return int(default_val)
        try:
            return int(cleaned)
        except Exception:
            print(f"[Scheduler] Warning: {name}='{raw}' is not a valid int. Using default {default_val}.")
            return int(default_val)
    try:
        return int(raw)
    except Exception:
        return int(default_val)

TRIGGER_HEALTH_URL = os.getenv(
    "TRIGGER_HEALTH_URL",
    "https://advisor-trigger-ki3t.onrender.com/health, https://advisorai-data-1ew2.onrender.com/health"
)
PING_INTERVAL = _parse_int_env("TRIGGER_PING_INTERVAL", 300)
# Pipeline interval default 3600s (1 hour)
PIPELINE_INTERVAL = _parse_int_env("PIPELINE_INTERVAL", 3600)

# -----------------------------------------------------------------------------
# ASYNC PINGER WITH EXPONENTIAL BACKOFF
# -----------------------------------------------------------------------------
async def ping_remote():
    """
    Continuously GET each URL in TRIGGER_HEALTH_URL (comma-separated) every PING_INTERVAL seconds,
    backing off on failure (up to 2.5 minutes).
    """
    urls = [u.strip() for u in TRIGGER_HEALTH_URL.split(",") if u.strip()]
    backoff = min(PING_INTERVAL, 5)
    async with httpx.AsyncClient(timeout=10.0) as client:
        while True:
            all_success = True
            for url in urls:
                try:
                    resp = await client.get(url)
                    resp.raise_for_status()
                    print(f"[Pinger] {url} -> {resp.status_code}")
                except Exception as e:
                    print(f"[Pinger] error pinging {url}: {e}")
                    all_success = False
            if all_success:
                backoff = PING_INTERVAL
                await asyncio.sleep(PING_INTERVAL)
            else:
                await asyncio.sleep(backoff)
                backoff = min(backoff * 2, 150)

def start_async_ping():
    """
    Spin up a dedicated asyncio loop in a daemon thread
    to run ping_remote() forever.
    """
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    loop.create_task(ping_remote())
    loop.run_forever()

# launch the ping loop in the background
threading.Thread(target=start_async_ping, daemon=True).start()
print("[Scheduler] Started background ping thread")

# -----------------------------------------------------------------------------
# MAIN PIPELINE LOOP (runs every 30 minutes)
# -----------------------------------------------------------------------------
import traceback

while True:
    from datetime import datetime
    last_run = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(f"[Scheduler] Running pipeline... Last run: {last_run}")
    # Write last_run to file for API access
    try:
        with open(app_config.LAST_RUN_PATH, 'w') as f:
            f.write(last_run)
    except Exception as e:
        print(f"[Scheduler] Failed to write last_run.txt: {e}")
    try:
        # Set working directory to project root (parent of deployment)
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
        print(f"[Scheduler] Project root: {project_root}")
        print(f"[Scheduler] Pipeline path: {PIPELINE_PATH}")

        # Run from '/' so relative 'data/...' writes resolve to '/data/...'
        result = subprocess.run(
            [sys.executable, PIPELINE_PATH],
            cwd='/',
            capture_output=True,
            text=True,
            env=os.environ.copy()
        )
        print(f"[Scheduler] Pipeline finished with code {result.returncode}")

        if result.stdout:
            print("[Scheduler] STDOUT:\n", result.stdout)
        if result.stderr:
            print("[Scheduler] STDERR:\n", result.stderr)

        # Raise an exception if the return code is non-zero
        if result.returncode != 0:
            raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)

    except subprocess.CalledProcessError as e:
        print(f"[Scheduler] Pipeline execution failed with return code {e.returncode}")
        print(f"[Scheduler] STDOUT:\n{e.stdout}")
        print(f"[Scheduler] STDERR:\n{e.stderr}")
    except Exception as e:
        print(f"[Scheduler] Exception running pipeline: {e}")
        print(traceback.format_exc())

    print(f"[Scheduler] Sleeping for {PIPELINE_INTERVAL // 60} minutes...")
    time.sleep(PIPELINE_INTERVAL)