web

Running

App Files Files Community

victor HF Staff commited on Oct 7

Commit

1d7f241

1 Parent(s): 1e6c032

Improve MCP server concurrency and safety

Browse files

Files changed (3) hide show

analytics.py +7 -2
app.py +286 -53
requirements.txt +2 -2

analytics.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # ─── analytics.py ──────────────────────────────────────────────────────────────
 import os
 import json
 from datetime import datetime, timedelta, timezone
 from filelock import FileLock  # pip install filelock
 import pandas as pd  # already available in HF images
@@ -63,8 +64,7 @@ def _normalize_counts_schema(data: dict) -> dict:
 # ──────────────────────────────────────────────────────────────────────────────
 # Public API
 # ──────────────────────────────────────────────────────────────────────────────
-async def record_request(tool: str) -> None:
-    """Increment today's counter (UTC) for the given tool: 'search' or 'fetch'."""
     tool = (tool or "").strip().lower()
     if tool not in {"search", "fetch"}:
         # Ignore unknown tool buckets to keep charts clean
@@ -79,6 +79,11 @@ async def record_request(tool: str) -> None:
         _save_counts(data)
 def last_n_days_count_df(tool: str, n: int = 30) -> pd.DataFrame:
     """Return DataFrame with a row for each of the past n days for the given tool."""
     tool = (tool or "").strip().lower()

 # ─── analytics.py ──────────────────────────────────────────────────────────────
 import os
 import json
+import asyncio
 from datetime import datetime, timedelta, timezone
 from filelock import FileLock  # pip install filelock
 import pandas as pd  # already available in HF images
 # ──────────────────────────────────────────────────────────────────────────────
 # Public API
 # ──────────────────────────────────────────────────────────────────────────────
+def _record_request_sync(tool: str) -> None:
     tool = (tool or "").strip().lower()
     if tool not in {"search", "fetch"}:
         # Ignore unknown tool buckets to keep charts clean
         _save_counts(data)
+async def record_request(tool: str) -> None:
+    """Increment today's counter (UTC) for the given tool: 'search' or 'fetch'."""
+    await asyncio.to_thread(_record_request_sync, tool)
 def last_n_days_count_df(tool: str, n: int = 30) -> pd.DataFrame:
     """Return DataFrame with a row for each of the past n days for the given tool."""
     tool = (tool or "").strip().lower()

app.py CHANGED Viewed

@@ -2,7 +2,10 @@ import os
 import time
 import re
 import html
-from typing import Optional, Dict, Any, List
 from urllib.parse import urlsplit
 from datetime import datetime, timezone
@@ -24,15 +27,179 @@ SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search"
 SERPER_NEWS_ENDPOINT = "https://google.serper.dev/news"
 HEADERS = {"X-API-KEY": SERPER_API_KEY or "", "Content-Type": "application/json"}
-# Rate limiting (shared by both tools)
 storage = MemoryStorage()
 limiter = MovingWindowRateLimiter(storage)
-rate_limit = parse("360/hour")  # shared global limit across search + fetch
 # ──────────────────────────────────────────────────────────────────────────────
 # Helpers
 # ──────────────────────────────────────────────────────────────────────────────
 def _domain_from_url(url: str) -> str:
     try:
         netloc = urlsplit(url).netloc
@@ -62,37 +229,48 @@ def _extract_title_from_html(html_text: str) -> Optional[str]:
 # Tool: search (metadata only)
 # ──────────────────────────────────────────────────────────────────────────────
 async def search(
-    query: str, search_type: str = "search", num_results: Optional[int] = 4
 ) -> Dict[str, Any]:
-    """
-    Perform a web or news search via Serper and return metadata ONLY.
-    Does NOT fetch or extract content from result URLs.
-    """
     start_time = time.time()
-    # Validate inputs
     if not query or not query.strip():
         await record_request("search")
         return {"error": "Missing 'query'. Please provide a search query string."}
     if num_results is None:
         num_results = 4
-    num_results = max(1, min(20, int(num_results)))
     if search_type not in ["search", "news"]:
         search_type = "search"
-    # Check API key
     if not SERPER_API_KEY:
         await record_request("search")
         return {
             "error": "SERPER_API_KEY is not set. Export SERPER_API_KEY and try again."
         }
     try:
-        # Rate limit
-        if not await limiter.hit(rate_limit, "global"):
             await record_request("search")
-            return {"error": "Rate limit exceeded. Limit: 360 requests/hour."}
         endpoint = (
             SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
@@ -102,8 +280,12 @@ async def search(
             payload["type"] = "news"
             payload["page"] = 1
-        async with httpx.AsyncClient(timeout=15) as client:
-            resp = await client.post(endpoint, headers=HEADERS, json=payload)
         if resp.status_code != 200:
             await record_request("search")
@@ -115,32 +297,22 @@ async def search(
         raw_results: List[Dict[str, Any]] = (
             data.get("news", []) if search_type == "news" else data.get("organic", [])
         )
-        if not raw_results:
-            await record_request("search")
-            return {
-                "query": query,
-                "search_type": search_type,
-                "count": 0,
-                "results": [],
-                "message": f"No {search_type} results found.",
-            }
         formatted: List[Dict[str, Any]] = []
-        for idx, r in enumerate(raw_results[:num_results], start=1):
-            item = {
                 "position": idx,
-                "title": r.get("title"),
-                "link": r.get("link"),
-                "domain": _domain_from_url(r.get("link", "")),
-                "snippet": r.get("snippet") or r.get("description"),
             }
             if search_type == "news":
-                item["source"] = r.get("source")
-                item["date"] = _iso_date_or_unknown(r.get("date"))
-            formatted.append(item)
-        await record_request("search")
-        return {
             "query": query,
             "search_type": search_type,
             "count": len(formatted),
@@ -148,6 +320,14 @@ async def search(
             "duration_s": round(time.time() - start_time, 2),
         }
     except Exception as e:
         await record_request("search")
         return {"error": f"Search failed: {str(e)}"}
@@ -156,10 +336,12 @@ async def search(
 # ──────────────────────────────────────────────────────────────────────────────
 # Tool: fetch (single URL fetch + extraction)
 # ──────────────────────────────────────────────────────────────────────────────
-async def fetch(url: str, timeout: int = 20) -> Dict[str, Any]:
-    """
-    Fetch a single URL and extract the main readable content.
-    """
     start_time = time.time()
     if not url or not isinstance(url, str):
@@ -170,26 +352,67 @@ async def fetch(url: str, timeout: int = 20) -> Dict[str, Any]:
         return {"error": "URL must start with http:// or https://."}
     try:
-        # Rate limit
-        if not await limiter.hit(rate_limit, "global"):
             await record_request("fetch")
-            return {"error": "Rate limit exceeded. Limit: 360 requests/hour."}
-        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-            resp = await client.get(url)
-        text = resp.text or ""
-        content = (
-            trafilatura.extract(
                 text,
                 include_formatting=False,
                 include_comments=False,
             )
-            or ""
-        )
         title = _extract_title_from_html(text) or ""
-        final_url_str = str(resp.url) if hasattr(resp, "url") else url
         domain = _domain_from_url(final_url_str)
         word_count = len(content.split()) if content else 0
@@ -197,14 +420,18 @@ async def fetch(url: str, timeout: int = 20) -> Dict[str, Any]:
             "url": url,
             "final_url": final_url_str,
             "domain": domain,
-            "status_code": resp.status_code,
             "title": title,
             "fetched_at": datetime.now(timezone.utc).isoformat(),
             "word_count": word_count,
-            "content": content.strip(),
             "duration_s": round(time.time() - start_time, 2),
         }
         await record_request("fetch")
         return result
@@ -366,6 +593,12 @@ with gr.Blocks(title="Web MCP Server") as demo:
     gr.api(fetch, api_name="fetch")
 if __name__ == "__main__":
     # Launch with MCP server enabled
     demo.launch(mcp_server=True, show_api=True)

 import time
 import re
 import html
+import asyncio
+import ipaddress
+import socket
+from typing import Optional, Dict, Any, List, Tuple
 from urllib.parse import urlsplit
 from datetime import datetime, timezone
 SERPER_NEWS_ENDPOINT = "https://google.serper.dev/news"
 HEADERS = {"X-API-KEY": SERPER_API_KEY or "", "Content-Type": "application/json"}
+# HTTP clients with connection pooling
+SERPER_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
+WEB_TIMEOUT = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=5.0)
+SERPER_LIMITS = httpx.Limits(
+    max_keepalive_connections=int(os.getenv("SERPER_KEEPALIVE", "32")),
+    max_connections=int(os.getenv("SERPER_MAX_CONNECTIONS", "128")),
+)
+WEB_LIMITS = httpx.Limits(
+    max_keepalive_connections=int(os.getenv("WEB_KEEPALIVE", "128")),
+    max_connections=int(os.getenv("WEB_MAX_CONNECTIONS", "512")),
+)
+serper_client = httpx.AsyncClient(
+    timeout=SERPER_TIMEOUT,
+    limits=SERPER_LIMITS,
+    http2=True,
+    headers=HEADERS,
+)
+DEFAULT_USER_AGENT = os.getenv(
+    "FETCH_USER_AGENT",
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/124.0 Safari/537.36",
+)
+web_client = httpx.AsyncClient(
+    timeout=WEB_TIMEOUT,
+    limits=WEB_LIMITS,
+    http2=True,
+    follow_redirects=True,
+    headers={"User-Agent": DEFAULT_USER_AGENT},
+)
+# Rate limiting (shared by both tools, process-local)
+GLOBAL_RATE = parse(os.getenv("GLOBAL_RATE", "3000/minute"))
+PER_IP_RATE = parse(os.getenv("PER_IP_RATE", "60/minute"))
 storage = MemoryStorage()
 limiter = MovingWindowRateLimiter(storage)
+# Concurrency controls & resource caps
+FETCH_MAX_BYTES = max(1024, int(os.getenv("FETCH_MAX_BYTES", "1500000")))
+FETCH_CONCURRENCY = max(1, int(os.getenv("FETCH_CONCURRENCY", "64")))
+SEARCH_CONCURRENCY = max(1, int(os.getenv("SEARCH_CONCURRENCY", "64")))
+EXTRACT_CONCURRENCY = max(
+    1,
+    int(
+        os.getenv(
+            "EXTRACT_CONCURRENCY",
+            str(max(4, (os.cpu_count() or 2) * 2)),
+        )
+    ),
+)
+SEARCH_CACHE_TTL = max(0, int(os.getenv("SEARCH_CACHE_TTL", "30")))
+FETCH_CACHE_TTL = max(0, int(os.getenv("FETCH_CACHE_TTL", "300")))
+_search_cache: Dict[Tuple[str, str, int], Dict[str, Any]] = {}
+_fetch_cache: Dict[str, Dict[str, Any]] = {}
+_search_cache_lock: Optional[asyncio.Lock] = None
+_fetch_cache_lock: Optional[asyncio.Lock] = None
+_search_sema: Optional[asyncio.Semaphore] = None
+_fetch_sema: Optional[asyncio.Semaphore] = None
+_extract_sema: Optional[asyncio.Semaphore] = None
 # ──────────────────────────────────────────────────────────────────────────────
 # Helpers
 # ──────────────────────────────────────────────────────────────────────────────
+def _get_cache_lock(name: str) -> asyncio.Lock:
+    global _search_cache_lock, _fetch_cache_lock
+    if name == "search":
+        if _search_cache_lock is None:
+            _search_cache_lock = asyncio.Lock()
+        return _search_cache_lock
+    if name == "fetch":
+        if _fetch_cache_lock is None:
+            _fetch_cache_lock = asyncio.Lock()
+        return _fetch_cache_lock
+    raise ValueError(f"Unknown cache lock: {name}")
+def _get_semaphore(name: str) -> asyncio.Semaphore:
+    global _search_sema, _fetch_sema, _extract_sema
+    if name == "search":
+        if _search_sema is None:
+            _search_sema = asyncio.Semaphore(SEARCH_CONCURRENCY)
+        return _search_sema
+    if name == "fetch":
+        if _fetch_sema is None:
+            _fetch_sema = asyncio.Semaphore(FETCH_CONCURRENCY)
+        return _fetch_sema
+    if name == "extract":
+        if _extract_sema is None:
+            _extract_sema = asyncio.Semaphore(EXTRACT_CONCURRENCY)
+        return _extract_sema
+    raise ValueError(f"Unknown semaphore: {name}")
+async def _cache_get(name: str, cache: Dict[Any, Any], key: Any):
+    lock = _get_cache_lock(name)
+    async with lock:
+        entry = cache.get(key)
+        if not entry:
+            return None
+        if time.time() > entry["expires_at"]:
+            cache.pop(key, None)
+            return None
+        return entry["value"]
+async def _cache_set(name: str, cache: Dict[Any, Any], key: Any, value: Any, ttl: int):
+    if ttl <= 0:
+        return
+    lock = _get_cache_lock(name)
+    async with lock:
+        cache[key] = {"expires_at": time.time() + ttl, "value": value}
+def _client_ip(request: Optional[gr.Request]) -> str:
+    try:
+        if request is None:
+            return "unknown"
+        headers = getattr(request, "headers", None) or {}
+        xff = headers.get("x-forwarded-for")
+        if xff:
+            return xff.split(",")[0].strip()
+        client = getattr(request, "client", None)
+        if client and getattr(client, "host", None):
+            return client.host
+    except Exception:
+        pass
+    return "unknown"
+async def _host_is_public(host: str) -> bool:
+    if not host:
+        return False
+    def _resolve() -> List[str]:
+        try:
+            return list({ai[4][0] for ai in socket.getaddrinfo(host, None)})
+        except Exception:
+            return []
+    addresses = await asyncio.to_thread(_resolve)
+    if not addresses:
+        # If resolution fails we let the HTTP fetch decide.
+        return True
+    for addr in addresses:
+        ip_obj = ipaddress.ip_address(addr)
+        if (
+            ip_obj.is_private
+            or ip_obj.is_loopback
+            or ip_obj.is_link_local
+            or ip_obj.is_multicast
+            or ip_obj.is_reserved
+            or ip_obj.is_unspecified
+        ):
+            return False
+    return True
+async def _check_rate_limits(bucket: str, ip: str) -> Optional[str]:
+    if not await limiter.hit(GLOBAL_RATE, "global"):
+        return f"Global rate limit exceeded. Limit: {GLOBAL_RATE}."
+    if ip != "unknown":
+        if not await limiter.hit(PER_IP_RATE, f"{bucket}:{ip}"):
+            return f"Per-IP rate limit exceeded. Limit: {PER_IP_RATE}."
+    return None
 def _domain_from_url(url: str) -> str:
     try:
         netloc = urlsplit(url).netloc
 # Tool: search (metadata only)
 # ──────────────────────────────────────────────────────────────────────────────
 async def search(
+    query: str,
+    search_type: str = "search",
+    num_results: Optional[int] = 4,
+    request: Optional[gr.Request] = None,
 ) -> Dict[str, Any]:
+    """Perform a web or news search via Serper and return metadata only."""
     start_time = time.time()
     if not query or not query.strip():
         await record_request("search")
         return {"error": "Missing 'query'. Please provide a search query string."}
+    query = query.strip()
     if num_results is None:
         num_results = 4
+    try:
+        num_results = max(1, min(20, int(num_results)))
+    except (TypeError, ValueError):
+        num_results = 4
     if search_type not in ["search", "news"]:
         search_type = "search"
     if not SERPER_API_KEY:
         await record_request("search")
         return {
             "error": "SERPER_API_KEY is not set. Export SERPER_API_KEY and try again."
         }
+    ip = _client_ip(request)
     try:
+        rl_message = await _check_rate_limits("search", ip)
+        if rl_message:
+            await record_request("search")
+            return {"error": rl_message}
+        cache_key = (query, search_type, num_results)
+        cached = await _cache_get("search", _search_cache, cache_key)
+        if cached:
             await record_request("search")
+            return cached
         endpoint = (
             SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
             payload["type"] = "news"
             payload["page"] = 1
+        semaphore = _get_semaphore("search")
+        await semaphore.acquire()
+        try:
+            resp = await serper_client.post(endpoint, json=payload)
+        finally:
+            semaphore.release()
         if resp.status_code != 200:
             await record_request("search")
         raw_results: List[Dict[str, Any]] = (
             data.get("news", []) if search_type == "news" else data.get("organic", [])
         )
         formatted: List[Dict[str, Any]] = []
+        for idx, item in enumerate(raw_results[:num_results], start=1):
+            entry = {
                 "position": idx,
+                "title": item.get("title"),
+                "link": item.get("link"),
+                "domain": _domain_from_url(item.get("link", "")),
+                "snippet": item.get("snippet") or item.get("description"),
             }
             if search_type == "news":
+                entry["source"] = item.get("source")
+                entry["date"] = _iso_date_or_unknown(item.get("date"))
+            formatted.append(entry)
+        result = {
             "query": query,
             "search_type": search_type,
             "count": len(formatted),
             "duration_s": round(time.time() - start_time, 2),
         }
+        if not formatted:
+            result["message"] = f"No {search_type} results found."
+        await _cache_set("search", _search_cache, cache_key, result, SEARCH_CACHE_TTL)
+        await record_request("search")
+        return result
     except Exception as e:
         await record_request("search")
         return {"error": f"Search failed: {str(e)}"}
 # ──────────────────────────────────────────────────────────────────────────────
 # Tool: fetch (single URL fetch + extraction)
 # ──────────────────────────────────────────────────────────────────────────────
+async def fetch(
+    url: str,
+    timeout: int = 20,
+    request: Optional[gr.Request] = None,
+) -> Dict[str, Any]:
+    """Fetch a single URL and extract the main readable content."""
     start_time = time.time()
     if not url or not isinstance(url, str):
         return {"error": "URL must start with http:// or https://."}
     try:
+        timeout = max(5, min(60, int(timeout)))
+    except (TypeError, ValueError):
+        timeout = 20
+    ip = _client_ip(request)
+    try:
+        host = urlsplit(url).hostname or ""
+        if not host:
+            await record_request("fetch")
+            return {"error": "Invalid URL; unable to determine host."}
+        rl_message = await _check_rate_limits("fetch", ip)
+        if rl_message:
             await record_request("fetch")
+            return {"error": rl_message}
+        cache_key = (url, timeout)
+        cached = await _cache_get("fetch", _fetch_cache, cache_key)
+        if cached:
+            await record_request("fetch")
+            return cached
+        if not await _host_is_public(host):
+            await record_request("fetch")
+            return {"error": "Refusing to fetch private or local addresses."}
+        fetch_sema = _get_semaphore("fetch")
+        await fetch_sema.acquire()
+        try:
+            async with web_client.stream("GET", url, timeout=timeout) as resp:
+                status_code = resp.status_code
+                total = 0
+                chunks: List[bytes] = []
+                async for chunk in resp.aiter_bytes():
+                    total += len(chunk)
+                    if total > FETCH_MAX_BYTES:
+                        break
+                    chunks.append(chunk)
+                body = b"".join(chunks)
+                final_url_str = str(resp.url)
+                encoding = resp.encoding or "utf-8"
+        finally:
+            fetch_sema.release()
+        truncated = total > FETCH_MAX_BYTES
+        text = body.decode(encoding, errors="ignore")
+        extract_sema = _get_semaphore("extract")
+        await extract_sema.acquire()
+        try:
+            content = await asyncio.to_thread(
+                trafilatura.extract,
                 text,
                 include_formatting=False,
                 include_comments=False,
             )
+        finally:
+            extract_sema.release()
+        content = (content or "").strip()
         title = _extract_title_from_html(text) or ""
         domain = _domain_from_url(final_url_str)
         word_count = len(content.split()) if content else 0
             "url": url,
             "final_url": final_url_str,
             "domain": domain,
+            "status_code": status_code,
             "title": title,
             "fetched_at": datetime.now(timezone.utc).isoformat(),
             "word_count": word_count,
+            "content": content,
             "duration_s": round(time.time() - start_time, 2),
         }
+        if truncated:
+            result["truncated"] = True
+        await _cache_set("fetch", _fetch_cache, cache_key, result, FETCH_CACHE_TTL)
         await record_request("fetch")
         return result
     gr.api(fetch, api_name="fetch")
+demo.queue(
+    max_size=int(os.getenv("GRADIO_MAX_QUEUE", "256")),
+    default_concurrency_limit=int(os.getenv("GRADIO_CONCURRENCY", "32")),
+)
 if __name__ == "__main__":
     # Launch with MCP server enabled
     demo.launch(mcp_server=True, show_api=True)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 gradio
-httpx
 trafilatura
 python-dateutil
 limits
-filelock

 gradio
+httpx[http2]
 trafilatura
 python-dateutil
 limits
+filelock