moneychatbot / config.py
hadadrjt's picture
SearchGPT: Initial.
408c946
raw
history blame
8.57 kB
#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#
#OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
#OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
MODEL = "gpt-4.1-nano"
SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
BAIDU_ENDPOINT = "https://www.baidu.com/s"
READER_ENDPOINT = "https://r.jina.ai/"
REQUEST_TIMEOUT = 300 # 5 minute
INSTRUCTIONS = """
You are ChatGPT with advanced real-time web search, content extraction, and summarization capabilities.
Your objective is to provide the most accurate, comprehensive, and professionally structured responses to user queries.
Always use web search to gather relevant information before responding unless the question is purely factual and does not require external sources.
Search workflow :
1. Perform a web search using available engines (Google, Bing, Baidu) to retrieve highly relevant results
2. Select up to 10 top results based on relevance, credibility, and content depth
3. For each selected URL, fetch the full content using the read_url function
4. Extract key information, critical data, and insights
5. Collect all URLs encountered in search results and content extraction
6. Provide a structured summary in English, professional, concise, and precise
7. Include citations for each URL used, in the format [Source title](URL)
8. If information is ambiguous, incomplete, or contradictory, clearly state it
9. Ensure your response is readable, logically organized, and free of emoji, dashes, or unnecessary symbols
"""
CONTENT_EXTRACTION = """
<system>
- Analyze the retrieved content in detail
- Identify all critical facts, arguments, statistics, and relevant data
- Collect all URLs, hyperlinks, references, and citations mentioned in the content
- Evaluate credibility of sources, highlight potential biases or conflicts
- Produce a structured, professional, and comprehensive summary
- Emphasize clarity, accuracy, and logical flow
- Include all discovered URLs in the final summary as [Source title](URL)
- Mark any uncertainties, contradictions, or missing information clearly
</system>
"""
SEARCH_SELECTION = """
<system>
- For each search result, fetch the full content using read_url
- Extract key information, main arguments, data points, and statistics
- Capture every URL present in the content or references
- Create a professional, structured summary in English
- List each source at the end of the summary in the format [Source title](link)
- Identify ambiguities or gaps in information
- Ensure clarity, completeness, and high information density
</system>
"""
DESCRIPTION = """
<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
<br><br>
This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
<b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
<br><br>
The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
<b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
<br><br>
The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
target="_blank">UltimaX Intelligence</a></b>.
<br><br>
Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
<br><br>
<b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
"""
OS = [
"Windows NT 10.0; Win64; x64",
"Macintosh; Intel Mac OS X 10_15_7",
"X11; Linux x86_64",
"Windows NT 11.0; Win64; x64",
"Macintosh; Intel Mac OS X 11_6_2"
]
OCTETS = [
1, 2, 3, 4, 5, 8, 12, 13, 14, 15,
16, 17, 18, 19, 20, 23, 24, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
219, 220, 221, 222, 223
]
BROWSERS = [
"Chrome",
"Firefox",
"Safari",
"Edge",
"Opera"
]
CHROME_VERSIONS = [
"120.0.0.0",
"119.0.0.0",
"118.0.0.0",
"117.0.0.0",
"116.0.0.0"
]
FIREFOX_VERSIONS = [
"121.0",
"120.0",
"119.0",
"118.0",
"117.0"
]
SAFARI_VERSIONS = [
"17.1",
"17.0",
"16.6",
"16.5",
"16.4",
]
EDGE_VERSIONS = [
"120.0.2210.91",
"119.0.2151.97",
"118.0.2088.76",
"117.0.2045.60",
"116.0.1938.81"
]
DOMAINS = [
"google.com",
"bing.com",
"yahoo.com",
"duckduckgo.com",
"baidu.com",
"yandex.com",
"facebook.com",
"twitter.com",
"linkedin.com",
"reddit.com",
"youtube.com",
"wikipedia.org",
"amazon.com",
"github.com",
"stackoverflow.com",
"medium.com",
"quora.com",
"pinterest.com",
"instagram.com",
"tumblr.com"
]
PROTOCOLS = [
"https://",
"https://www."
]
SEARCH_ENGINES = [
"https://www.google.com/search?q=",
"https://www.bing.com/search?q=",
"https://search.yahoo.com/search?p=",
"https://duckduckgo.com/?q=",
"https://www.baidu.com/s?wd=",
"https://yandex.com/search/?text=",
"https://www.google.co.uk/search?q=",
"https://www.google.ca/search?q=",
"https://www.google.com.au/search?q=",
"https://www.google.de/search?q=",
"https://www.google.fr/search?q=",
"https://www.google.co.jp/search?q=",
"https://www.google.com.br/search?q=",
"https://www.google.co.in/search?q=",
"https://www.google.ru/search?q=",
"https://www.google.it/search?q="
]
KEYWORDS = [
"news",
"weather",
"sports",
"technology",
"science",
"health",
"finance",
"entertainment",
"travel",
"food",
"education",
"business",
"politics",
"culture",
"history",
"music",
"movies",
"games",
"books",
"art"
]
COUNTRIES = [
"US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
"IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
"KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
"CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
"TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
]
LANGUAGES = [
"en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
"pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
"sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
"zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
"es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
"en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
"ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
]
TIMEZONES = [
"America/New_York",
"America/Chicago",
"America/Los_Angeles",
"America/Denver",
"Europe/London",
"Europe/Paris",
"Europe/Berlin",
"Europe/Moscow",
"Asia/Tokyo",
"Asia/Shanghai",
"Asia/Hong_Kong",
"Asia/Singapore",
"Asia/Seoul",
"Asia/Mumbai",
"Asia/Dubai",
"Australia/Sydney",
"Australia/Melbourne",
"America/Toronto",
"America/Vancouver",
"America/Mexico_City",
"America/Sao_Paulo",
"America/Buenos_Aires",
"Africa/Cairo",
"Africa/Johannesburg",
"Africa/Lagos",
"Africa/Nairobi",
"Pacific/Auckland",
"Pacific/Honolulu"
]