File size: 3,166 Bytes
8267d7b
 
 
 
 
 
 
 
 
 
 
 
 
 
02c31e0
 
8267d7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import json
import urllib.parse
import requests
import gradio as gr
from typing import Literal, List, Dict

JINA_API_BASE = "https://s.jina.ai"  # Search endpoint
# For reading page content, you can also use r.jina.ai like: https://r.jina.ai/https://example.com

def _headers():
    api_key = os.getenv("JINA_API_KEY")
    headers = {
        "Accept": "application/json",
        "X-Engine": "direct",
        
    }
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    return headers

def search(input_query: str, max_results: int = 5) -> List[Dict[Literal["snippet", "title", "link"], str]]:
    """
    Perform a web search using Jina AI (s.jina.ai).

    Args:
        input_query: The query to search for.
        max_results: The maximum number of results to return. Defaults to 5.

    Returns:
        A list of dictionaries with "snippet", "title", and "link".
    """
    # Jina Search accepts the query as part of the path; use JSON response for structured data
    # URL-encode the query to be safe in the path segment
    encoded = urllib.parse.quote(input_query, safe="")
    url = f"{JINA_API_BASE}/{encoded}"

    # Request JSON output; Jina returns the top results with metadata when Accept: application/json is set
    resp = requests.get(url, headers=_headers(), timeout=30)
    resp.raise_for_status()

    data = resp.json()
    # The JSON shape from s.jina.ai includes results with title, url, and snippet-like content
    # We map to a compact schema: title, link, snippet
    items = []
    # Common keys seen include "results" or "data" depending on mode; prefer "results" if present
    results = []
    if isinstance(data, dict):
        if "results" in data and isinstance(data["results"], list):
            results = data["results"]
        elif "data" in data and isinstance(data["data"], list):
            results = data["data"]
        else:
            # Fallback: if it's already a list
            results = data if isinstance(data, list) else []
    elif isinstance(data, list):
        results = data

    for r in results[:max_results]:
        title = r.get("title") or r.get("headline") or ""
        link = r.get("url") or r.get("link") or ""
        # Prefer a concise snippet if present; otherwise fallback to any text/description
        snippet = r.get("snippet") or r.get("description") or r.get("content") or ""
        # Ensure strings
        title = str(title) if title is not None else ""
        link = str(link) if link is not None else ""
        snippet = str(snippet) if snippet is not None else ""
        if link or title or snippet:
            items.append({"title": title, "link": link, "snippet": snippet})

    return items

demo = gr.Interface(
    fn=search,
    inputs=[
        gr.Textbox(value="Ahly SC of Egypt matches.", label="Search query"),
        gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
    ],
    outputs=gr.JSON(label="Search results"),
    title="Web Searcher using Jina AI",
    description="Search the web using Jina AI Search Foundation API (s.jina.ai).",
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)