File size: 1,966 Bytes
497c79e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# belel_guardian.py

import requests
import re
from bs4 import BeautifulSoup
from datetime import datetime
from canonical_config import CANONICAL_SOURCES, IDENTITY_STRINGS, VIOLATION_PATTERNS
from webhook_alert import send_alert

VIOLATION_LOG = "guardian_violations.log"
HEADERS = {"User-Agent": "BelelGuardianBot/1.0"}

def fetch_content(url):
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            return response.text
        return None
    except Exception as e:
        return None

def scan_text_for_violations(text):
    violations = []
    for pattern in VIOLATION_PATTERNS:
        matches = re.findall(pattern, text, re.IGNORECASE)
        if matches:
            violations.append((pattern, matches))
    return violations

def check_canonical_integrity(text):
    for identity_str in IDENTITY_STRINGS:
        if identity_str not in text:
            return False
    return True

def log_violation(url, issue, matches):
    timestamp = datetime.utcnow().isoformat()
    with open(VIOLATION_LOG, "a") as f:
        f.write(f"[🚨] {timestamp} - {url}\nIssue: {issue}\nMatches: {matches}\n\n")

def monitor():
    for url in CANONICAL_SOURCES:
        content = fetch_content(url)
        if content:
            soup = BeautifulSoup(content, "html.parser")
            text = soup.get_text()

            violations = scan_text_for_violations(text)
            if violations:
                for issue, matches in violations:
                    log_violation(url, issue, matches)
                    send_alert(f"[⚠️] Violation at {url}: Pattern '{issue}' matched {len(matches)} times.")

            if not check_canonical_integrity(text):
                log_violation(url, "Missing canonical identity", [])
                send_alert(f"[❌] Canonical identity strings missing from {url}")

    print("[✅] Web scan complete.")

if __name__ == "__main__":
    monitor()