Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	Update webscout.py
Browse files- webscout.py +175 -142
    	
        webscout.py
    CHANGED
    
    | @@ -1,3 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import logging
         | 
| 2 | 
             
            import warnings
         | 
| 3 | 
             
            from concurrent.futures import ThreadPoolExecutor
         | 
| @@ -8,21 +13,11 @@ from itertools import cycle, islice | |
| 8 | 
             
            from random import choice
         | 
| 9 | 
             
            from threading import Event
         | 
| 10 | 
             
            from types import TracebackType
         | 
| 11 | 
            -
            from typing import  | 
| 12 | 
            -
            import asyncio
         | 
| 13 | 
            -
            import json
         | 
| 14 | 
            -
            import aiohttp
         | 
| 15 | 
            -
            import requests
         | 
| 16 | 
            -
            import http.cookiejar as cookiejar
         | 
| 17 | 
            -
            import json
         | 
| 18 | 
            -
            from xml.etree import ElementTree
         | 
| 19 | 
            -
            import re
         | 
| 20 | 
            -
            import html.parser
         | 
| 21 | 
            -
            from typing import List, Dict, Union, Optional
         | 
| 22 | 
            -
             | 
| 23 | 
            -
            import pyreqwest_impersonate as pri
         | 
| 24 |  | 
|  | |
| 25 |  | 
|  | |
| 26 |  | 
| 27 | 
             
            try:
         | 
| 28 | 
             
                from lxml.etree import _Element
         | 
| @@ -33,6 +28,34 @@ try: | |
| 33 | 
             
            except ImportError:
         | 
| 34 | 
             
                LXML_AVAILABLE = False
         | 
| 35 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 36 | 
             
            import re
         | 
| 37 | 
             
            from decimal import Decimal
         | 
| 38 | 
             
            from html import unescape
         | 
| @@ -117,24 +140,10 @@ def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decim | |
| 117 | 
             
                c = 2 * atan2(sqrt(a), sqrt(1 - a))
         | 
| 118 | 
             
                return R * c
         | 
| 119 |  | 
| 120 | 
            -
             | 
| 121 | 
            -
                """ | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
            class RatelimitE(Exception):
         | 
| 125 | 
            -
                """Raised for rate limit exceeded errors during API requests."""
         | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
            class TimeoutE(Exception):
         | 
| 129 | 
            -
                """Raised for timeout errors during API requests."""
         | 
| 130 | 
            -
                
         | 
| 131 | 
            -
            class FailedToGenerateResponseError(Exception):
         | 
| 132 | 
            -
                
         | 
| 133 | 
            -
                """Provider failed to fetch response"""
         | 
| 134 | 
            -
            class AllProvidersFailure(Exception):
         | 
| 135 | 
            -
                """None of the providers generated response successfully"""
         | 
| 136 |  | 
| 137 | 
            -
                pass
         | 
| 138 | 
             
            logger = logging.getLogger("webscout.WEBS")
         | 
| 139 |  | 
| 140 |  | 
| @@ -143,21 +152,22 @@ class WEBS: | |
| 143 |  | 
| 144 | 
             
                _executor: ThreadPoolExecutor = ThreadPoolExecutor()
         | 
| 145 | 
             
                _impersonates = (
         | 
| 146 | 
            -
                    " | 
| 147 | 
            -
                    " | 
| 148 | 
            -
                     | 
| 149 | 
            -
                    " | 
| 150 | 
            -
                    " | 
| 151 | 
            -
                     | 
| 152 | 
            -
                    " | 
|  | |
| 153 | 
             
                )  # fmt: skip
         | 
| 154 |  | 
| 155 | 
             
                def __init__(
         | 
| 156 | 
             
                    self,
         | 
| 157 | 
            -
                    headers:  | 
| 158 | 
            -
                    proxy:  | 
| 159 | 
            -
                    proxies:  | 
| 160 | 
            -
                    timeout:  | 
| 161 | 
             
                ) -> None:
         | 
| 162 | 
             
                    """Initialize the WEBS object.
         | 
| 163 |  | 
| @@ -167,14 +177,14 @@ class WEBS: | |
| 167 | 
             
                            example: "http://user:[email protected]:3128". Defaults to None.
         | 
| 168 | 
             
                        timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
         | 
| 169 | 
             
                    """
         | 
| 170 | 
            -
                    self.proxy:  | 
| 171 | 
             
                    assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
         | 
| 172 | 
             
                    if not proxy and proxies:
         | 
| 173 | 
             
                        warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
         | 
| 174 | 
             
                        self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
         | 
| 175 | 
             
                    self.headers = headers if headers else {}
         | 
| 176 | 
             
                    self.headers["Referer"] = "https://duckduckgo.com/"
         | 
| 177 | 
            -
                    self.client =  | 
| 178 | 
             
                        headers=self.headers,
         | 
| 179 | 
             
                        proxy=self.proxy,
         | 
| 180 | 
             
                        timeout=timeout,
         | 
| @@ -185,22 +195,23 @@ class WEBS: | |
| 185 | 
             
                        verify=False,
         | 
| 186 | 
             
                    )
         | 
| 187 | 
             
                    self._exception_event = Event()
         | 
| 188 | 
            -
                    self._chat_messages:  | 
|  | |
| 189 | 
             
                    self._chat_vqd: str = ""
         | 
| 190 |  | 
| 191 | 
            -
                def __enter__(self) ->  | 
| 192 | 
             
                    return self
         | 
| 193 |  | 
| 194 | 
             
                def __exit__(
         | 
| 195 | 
             
                    self,
         | 
| 196 | 
            -
                    exc_type:  | 
| 197 | 
            -
                    exc_val:  | 
| 198 | 
            -
                    exc_tb:  | 
| 199 | 
             
                ) -> None:
         | 
| 200 | 
             
                    pass
         | 
| 201 |  | 
| 202 | 
             
                @cached_property
         | 
| 203 | 
            -
                def parser(self) ->  | 
| 204 | 
             
                    """Get HTML parser."""
         | 
| 205 | 
             
                    return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
         | 
| 206 |  | 
| @@ -208,9 +219,9 @@ class WEBS: | |
| 208 | 
             
                    self,
         | 
| 209 | 
             
                    method: str,
         | 
| 210 | 
             
                    url: str,
         | 
| 211 | 
            -
                    params:  | 
| 212 | 
            -
                    content:  | 
| 213 | 
            -
                    data:  | 
| 214 | 
             
                ) -> bytes:
         | 
| 215 | 
             
                    if self._exception_event.is_set():
         | 
| 216 | 
             
                        raise WebscoutE("Exception occurred in previous call.")
         | 
| @@ -234,24 +245,30 @@ class WEBS: | |
| 234 | 
             
                    resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
         | 
| 235 | 
             
                    return _extract_vqd(resp_content, keywords)
         | 
| 236 |  | 
| 237 | 
            -
                def chat(self, keywords: str, model: str = "gpt- | 
| 238 | 
            -
                    """Initiates a chat session with  | 
| 239 |  | 
| 240 | 
             
                    Args:
         | 
| 241 | 
             
                        keywords (str): The initial message or question to send to the AI.
         | 
| 242 | 
            -
                        model (str): The model to use: "gpt- | 
| 243 | 
            -
                            Defaults to "gpt- | 
| 244 | 
             
                        timeout (int): Timeout value for the HTTP client. Defaults to 20.
         | 
| 245 |  | 
| 246 | 
             
                    Returns:
         | 
| 247 | 
             
                        str: The response from the AI.
         | 
| 248 | 
             
                    """
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 249 | 
             
                    models = {
         | 
| 250 | 
             
                        "claude-3-haiku": "claude-3-haiku-20240307",
         | 
| 251 | 
            -
                        "gpt-3.5": "gpt-3.5-turbo-0125",
         | 
| 252 | 
            -
                        "llama-3-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
         | 
| 253 | 
            -
                        "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         | 
| 254 | 
             
                        "gpt-4o-mini": "gpt-4o-mini",
         | 
|  | |
|  | |
| 255 | 
             
                    }
         | 
| 256 | 
             
                    # vqd
         | 
| 257 | 
             
                    if not self._chat_vqd:
         | 
| @@ -259,6 +276,7 @@ class WEBS: | |
| 259 | 
             
                        self._chat_vqd = resp.headers.get("x-vqd-4", "")
         | 
| 260 |  | 
| 261 | 
             
                    self._chat_messages.append({"role": "user", "content": keywords})
         | 
|  | |
| 262 |  | 
| 263 | 
             
                    json_data = {
         | 
| 264 | 
             
                        "model": models[model],
         | 
| @@ -272,10 +290,26 @@ class WEBS: | |
| 272 | 
             
                    )
         | 
| 273 | 
             
                    self._chat_vqd = resp.headers.get("x-vqd-4", "")
         | 
| 274 |  | 
| 275 | 
            -
                    data = ",".join(x for line in resp.text.rstrip("[DONE]\n").split("data:") if (x := line.strip()))
         | 
| 276 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 277 |  | 
| 278 | 
             
                    self._chat_messages.append({"role": "assistant", "content": result})
         | 
|  | |
| 279 | 
             
                    return result
         | 
| 280 |  | 
| 281 | 
             
                def text(
         | 
| @@ -283,11 +317,11 @@ class WEBS: | |
| 283 | 
             
                    keywords: str,
         | 
| 284 | 
             
                    region: str = "wt-wt",
         | 
| 285 | 
             
                    safesearch: str = "moderate",
         | 
| 286 | 
            -
                    timelimit:  | 
| 287 | 
             
                    backend: str = "api",
         | 
| 288 | 
            -
                    max_results:  | 
| 289 | 
            -
                ) ->  | 
| 290 | 
            -
                    """ | 
| 291 |  | 
| 292 | 
             
                    Args:
         | 
| 293 | 
             
                        keywords: keywords for query.
         | 
| @@ -325,10 +359,10 @@ class WEBS: | |
| 325 | 
             
                    keywords: str,
         | 
| 326 | 
             
                    region: str = "wt-wt",
         | 
| 327 | 
             
                    safesearch: str = "moderate",
         | 
| 328 | 
            -
                    timelimit:  | 
| 329 | 
            -
                    max_results:  | 
| 330 | 
            -
                ) ->  | 
| 331 | 
            -
                    """ | 
| 332 |  | 
| 333 | 
             
                    Args:
         | 
| 334 | 
             
                        keywords: keywords for query.
         | 
| @@ -371,9 +405,9 @@ class WEBS: | |
| 371 | 
             
                        payload["df"] = timelimit
         | 
| 372 |  | 
| 373 | 
             
                    cache = set()
         | 
| 374 | 
            -
                    results:  | 
| 375 |  | 
| 376 | 
            -
                    def _text_api_page(s: int) ->  | 
| 377 | 
             
                        payload["s"] = f"{s}"
         | 
| 378 | 
             
                        resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
         | 
| 379 | 
             
                        page_data = _text_extract_json(resp_content, keywords)
         | 
| @@ -408,10 +442,10 @@ class WEBS: | |
| 408 | 
             
                    self,
         | 
| 409 | 
             
                    keywords: str,
         | 
| 410 | 
             
                    region: str = "wt-wt",
         | 
| 411 | 
            -
                    timelimit:  | 
| 412 | 
            -
                    max_results:  | 
| 413 | 
            -
                ) ->  | 
| 414 | 
            -
                    """ | 
| 415 |  | 
| 416 | 
             
                    Args:
         | 
| 417 | 
             
                        keywords: keywords for query.
         | 
| @@ -445,9 +479,9 @@ class WEBS: | |
| 445 | 
             
                        payload["vqd"] = vqd
         | 
| 446 |  | 
| 447 | 
             
                    cache = set()
         | 
| 448 | 
            -
                    results:  | 
| 449 |  | 
| 450 | 
            -
                    def _text_html_page(s: int) ->  | 
| 451 | 
             
                        payload["s"] = f"{s}"
         | 
| 452 | 
             
                        resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
         | 
| 453 | 
             
                        if b"No  results." in resp_content:
         | 
| @@ -456,12 +490,12 @@ class WEBS: | |
| 456 | 
             
                        page_results = []
         | 
| 457 | 
             
                        tree = document_fromstring(resp_content, self.parser)
         | 
| 458 | 
             
                        elements = tree.xpath("//div[h2]")
         | 
| 459 | 
            -
                        if not isinstance(elements,  | 
| 460 | 
             
                            return []
         | 
| 461 | 
             
                        for e in elements:
         | 
| 462 | 
             
                            if isinstance(e, _Element):
         | 
| 463 | 
             
                                hrefxpath = e.xpath("./a/@href")
         | 
| 464 | 
            -
                                href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath,  | 
| 465 | 
             
                                if (
         | 
| 466 | 
             
                                    href
         | 
| 467 | 
             
                                    and href not in cache
         | 
| @@ -471,9 +505,9 @@ class WEBS: | |
| 471 | 
             
                                ):
         | 
| 472 | 
             
                                    cache.add(href)
         | 
| 473 | 
             
                                    titlexpath = e.xpath("./h2/a/text()")
         | 
| 474 | 
            -
                                    title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath,  | 
| 475 | 
             
                                    bodyxpath = e.xpath("./a//text()")
         | 
| 476 | 
            -
                                    body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath,  | 
| 477 | 
             
                                    result = {
         | 
| 478 | 
             
                                        "title": _normalize(title),
         | 
| 479 | 
             
                                        "href": _normalize_url(href),
         | 
| @@ -498,10 +532,10 @@ class WEBS: | |
| 498 | 
             
                    self,
         | 
| 499 | 
             
                    keywords: str,
         | 
| 500 | 
             
                    region: str = "wt-wt",
         | 
| 501 | 
            -
                    timelimit:  | 
| 502 | 
            -
                    max_results:  | 
| 503 | 
            -
                ) ->  | 
| 504 | 
            -
                    """ | 
| 505 |  | 
| 506 | 
             
                    Args:
         | 
| 507 | 
             
                        keywords: keywords for query.
         | 
| @@ -532,9 +566,9 @@ class WEBS: | |
| 532 | 
             
                        payload["df"] = timelimit
         | 
| 533 |  | 
| 534 | 
             
                    cache = set()
         | 
| 535 | 
            -
                    results:  | 
| 536 |  | 
| 537 | 
            -
                    def _text_lite_page(s: int) ->  | 
| 538 | 
             
                        payload["s"] = f"{s}"
         | 
| 539 | 
             
                        resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
         | 
| 540 | 
             
                        if b"No more results." in resp_content:
         | 
| @@ -543,7 +577,7 @@ class WEBS: | |
| 543 | 
             
                        page_results = []
         | 
| 544 | 
             
                        tree = document_fromstring(resp_content, self.parser)
         | 
| 545 | 
             
                        elements = tree.xpath("//table[last()]//tr")
         | 
| 546 | 
            -
                        if not isinstance(elements,  | 
| 547 | 
             
                            return []
         | 
| 548 |  | 
| 549 | 
             
                        data = zip(cycle(range(1, 5)), elements)
         | 
| @@ -551,7 +585,7 @@ class WEBS: | |
| 551 | 
             
                            if isinstance(e, _Element):
         | 
| 552 | 
             
                                if i == 1:
         | 
| 553 | 
             
                                    hrefxpath = e.xpath(".//a//@href")
         | 
| 554 | 
            -
                                    href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath,  | 
| 555 | 
             
                                    if (
         | 
| 556 | 
             
                                        href is None
         | 
| 557 | 
             
                                        or href in cache
         | 
| @@ -563,12 +597,12 @@ class WEBS: | |
| 563 | 
             
                                    else:
         | 
| 564 | 
             
                                        cache.add(href)
         | 
| 565 | 
             
                                        titlexpath = e.xpath(".//a//text()")
         | 
| 566 | 
            -
                                        title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath,  | 
| 567 | 
             
                                elif i == 2:
         | 
| 568 | 
             
                                    bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
         | 
| 569 | 
             
                                    body = (
         | 
| 570 | 
             
                                        "".join(str(x) for x in bodyxpath).strip()
         | 
| 571 | 
            -
                                        if bodyxpath and isinstance(bodyxpath,  | 
| 572 | 
             
                                        else ""
         | 
| 573 | 
             
                                    )
         | 
| 574 | 
             
                                    if href:
         | 
| @@ -597,15 +631,15 @@ class WEBS: | |
| 597 | 
             
                    keywords: str,
         | 
| 598 | 
             
                    region: str = "wt-wt",
         | 
| 599 | 
             
                    safesearch: str = "moderate",
         | 
| 600 | 
            -
                    timelimit:  | 
| 601 | 
            -
                    size:  | 
| 602 | 
            -
                    color:  | 
| 603 | 
            -
                    type_image:  | 
| 604 | 
            -
                    layout:  | 
| 605 | 
            -
                    license_image:  | 
| 606 | 
            -
                    max_results:  | 
| 607 | 
            -
                ) ->  | 
| 608 | 
            -
                    """ | 
| 609 |  | 
| 610 | 
             
                    Args:
         | 
| 611 | 
             
                        keywords: keywords for query.
         | 
| @@ -653,9 +687,9 @@ class WEBS: | |
| 653 | 
             
                    }
         | 
| 654 |  | 
| 655 | 
             
                    cache = set()
         | 
| 656 | 
            -
                    results:  | 
| 657 |  | 
| 658 | 
            -
                    def _images_page(s: int) ->  | 
| 659 | 
             
                        payload["s"] = f"{s}"
         | 
| 660 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload)
         | 
| 661 | 
             
                        resp_json = json_loads(resp_content)
         | 
| @@ -695,13 +729,13 @@ class WEBS: | |
| 695 | 
             
                    keywords: str,
         | 
| 696 | 
             
                    region: str = "wt-wt",
         | 
| 697 | 
             
                    safesearch: str = "moderate",
         | 
| 698 | 
            -
                    timelimit:  | 
| 699 | 
            -
                    resolution:  | 
| 700 | 
            -
                    duration:  | 
| 701 | 
            -
                    license_videos:  | 
| 702 | 
            -
                    max_results:  | 
| 703 | 
            -
                ) ->  | 
| 704 | 
            -
                    """ | 
| 705 |  | 
| 706 | 
             
                    Args:
         | 
| 707 | 
             
                        keywords: keywords for query.
         | 
| @@ -740,9 +774,9 @@ class WEBS: | |
| 740 | 
             
                    }
         | 
| 741 |  | 
| 742 | 
             
                    cache = set()
         | 
| 743 | 
            -
                    results:  | 
| 744 |  | 
| 745 | 
            -
                    def _videos_page(s: int) ->  | 
| 746 | 
             
                        payload["s"] = f"{s}"
         | 
| 747 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload)
         | 
| 748 | 
             
                        resp_json = json_loads(resp_content)
         | 
| @@ -772,10 +806,10 @@ class WEBS: | |
| 772 | 
             
                    keywords: str,
         | 
| 773 | 
             
                    region: str = "wt-wt",
         | 
| 774 | 
             
                    safesearch: str = "moderate",
         | 
| 775 | 
            -
                    timelimit:  | 
| 776 | 
            -
                    max_results:  | 
| 777 | 
            -
                ) ->  | 
| 778 | 
            -
                    """ | 
| 779 |  | 
| 780 | 
             
                    Args:
         | 
| 781 | 
             
                        keywords: keywords for query.
         | 
| @@ -809,9 +843,9 @@ class WEBS: | |
| 809 | 
             
                        payload["df"] = timelimit
         | 
| 810 |  | 
| 811 | 
             
                    cache = set()
         | 
| 812 | 
            -
                    results:  | 
| 813 |  | 
| 814 | 
            -
                    def _news_page(s: int) ->  | 
| 815 | 
             
                        payload["s"] = f"{s}"
         | 
| 816 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload)
         | 
| 817 | 
             
                        resp_json = json_loads(resp_content)
         | 
| @@ -844,8 +878,8 @@ class WEBS: | |
| 844 |  | 
| 845 | 
             
                    return list(islice(results, max_results))
         | 
| 846 |  | 
| 847 | 
            -
                def answers(self, keywords: str) ->  | 
| 848 | 
            -
                    """ | 
| 849 |  | 
| 850 | 
             
                    Args:
         | 
| 851 | 
             
                        keywords: keywords for query,
         | 
| @@ -915,8 +949,8 @@ class WEBS: | |
| 915 |  | 
| 916 | 
             
                    return results
         | 
| 917 |  | 
| 918 | 
            -
                def suggestions(self, keywords: str, region: str = "wt-wt") ->  | 
| 919 | 
            -
                    """ | 
| 920 |  | 
| 921 | 
             
                    Args:
         | 
| 922 | 
             
                        keywords: keywords for query.
         | 
| @@ -943,19 +977,19 @@ class WEBS: | |
| 943 | 
             
                def maps(
         | 
| 944 | 
             
                    self,
         | 
| 945 | 
             
                    keywords: str,
         | 
| 946 | 
            -
                    place:  | 
| 947 | 
            -
                    street:  | 
| 948 | 
            -
                    city:  | 
| 949 | 
            -
                    county:  | 
| 950 | 
            -
                    state:  | 
| 951 | 
            -
                    country:  | 
| 952 | 
            -
                    postalcode:  | 
| 953 | 
            -
                    latitude:  | 
| 954 | 
            -
                    longitude:  | 
| 955 | 
             
                    radius: int = 0,
         | 
| 956 | 
            -
                    max_results:  | 
| 957 | 
            -
                ) ->  | 
| 958 | 
            -
                    """ | 
| 959 |  | 
| 960 | 
             
                    Args:
         | 
| 961 | 
             
                        keywords: keywords for query
         | 
| @@ -1038,11 +1072,11 @@ class WEBS: | |
| 1038 | 
             
                    logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
         | 
| 1039 |  | 
| 1040 | 
             
                    cache = set()
         | 
| 1041 | 
            -
                    results:  | 
| 1042 |  | 
| 1043 | 
             
                    def _maps_page(
         | 
| 1044 | 
            -
                        bbox:  | 
| 1045 | 
            -
                    ) ->  | 
| 1046 | 
             
                        if max_results and len(results) >= max_results:
         | 
| 1047 | 
             
                            return None
         | 
| 1048 | 
             
                        lat_t, lon_l, lat_b, lon_r = bbox
         | 
| @@ -1129,10 +1163,8 @@ class WEBS: | |
| 1129 |  | 
| 1130 | 
             
                    return list(islice(results, max_results))
         | 
| 1131 |  | 
| 1132 | 
            -
                def translate(
         | 
| 1133 | 
            -
                     | 
| 1134 | 
            -
                ) -> List[Dict[str, str]]:
         | 
| 1135 | 
            -
                    """DuckDuckGo translate.
         | 
| 1136 |  | 
| 1137 | 
             
                    Args:
         | 
| 1138 | 
             
                        keywords: string or list of strings to translate.
         | 
| @@ -1159,14 +1191,14 @@ class WEBS: | |
| 1159 | 
             
                    if from_:
         | 
| 1160 | 
             
                        payload["from"] = from_
         | 
| 1161 |  | 
| 1162 | 
            -
                    def _translate_keyword(keyword: str) ->  | 
| 1163 | 
             
                        resp_content = self._get_url(
         | 
| 1164 | 
             
                            "POST",
         | 
| 1165 | 
             
                            "https://duckduckgo.com/translation.js",
         | 
| 1166 | 
             
                            params=payload,
         | 
| 1167 | 
             
                            content=keyword.encode(),
         | 
| 1168 | 
             
                        )
         | 
| 1169 | 
            -
                        page_data:  | 
| 1170 | 
             
                        page_data["original"] = keyword
         | 
| 1171 | 
             
                        return page_data
         | 
| 1172 |  | 
| @@ -1182,6 +1214,7 @@ class WEBS: | |
| 1182 |  | 
| 1183 | 
             
                    return results
         | 
| 1184 |  | 
|  | |
| 1185 | 
             
            html_parser = html.parser.HTMLParser()
         | 
| 1186 |  | 
| 1187 |  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import html
         | 
| 4 | 
            +
            import http.cookiejar as cookiejar
         | 
| 5 | 
            +
            from xml.etree import ElementTree
         | 
| 6 | 
             
            import logging
         | 
| 7 | 
             
            import warnings
         | 
| 8 | 
             
            from concurrent.futures import ThreadPoolExecutor
         | 
|  | |
| 13 | 
             
            from random import choice
         | 
| 14 | 
             
            from threading import Event
         | 
| 15 | 
             
            from types import TracebackType
         | 
| 16 | 
            +
            from typing import Optional, cast
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 17 |  | 
| 18 | 
            +
            import requests
         | 
| 19 |  | 
| 20 | 
            +
            import primp  # type: ignore
         | 
| 21 |  | 
| 22 | 
             
            try:
         | 
| 23 | 
             
                from lxml.etree import _Element
         | 
|  | |
| 28 | 
             
            except ImportError:
         | 
| 29 | 
             
                LXML_AVAILABLE = False
         | 
| 30 |  | 
| 31 | 
            +
            class WebscoutE(Exception):
         | 
| 32 | 
            +
                """Base exception class for search."""
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
            class RatelimitE(Exception):
         | 
| 36 | 
            +
                """Raised for rate limit exceeded errors during API requests."""
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            class ConversationLimitException(Exception):
         | 
| 39 | 
            +
                """Raised for conversation limit exceeded errors during API requests."""
         | 
| 40 | 
            +
                pass
         | 
| 41 | 
            +
            class TimeoutE(Exception):
         | 
| 42 | 
            +
                """Raised for timeout errors during API requests."""
         | 
| 43 | 
            +
                
         | 
| 44 | 
            +
            class FailedToGenerateResponseError(Exception):
         | 
| 45 | 
            +
                
         | 
| 46 | 
            +
                """Provider failed to fetch response"""
         | 
| 47 | 
            +
            class AllProvidersFailure(Exception):
         | 
| 48 | 
            +
                """None of the providers generated response successfully"""
         | 
| 49 | 
            +
                pass
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            class FacebookInvalidCredentialsException(Exception):
         | 
| 52 | 
            +
                pass
         | 
| 53 | 
            +
             | 
| 54 | 
            +
             | 
| 55 | 
            +
            class FacebookRegionBlocked(Exception):
         | 
| 56 | 
            +
                pass
         | 
| 57 | 
            +
             | 
| 58 | 
            +
             | 
| 59 | 
             
            import re
         | 
| 60 | 
             
            from decimal import Decimal
         | 
| 61 | 
             
            from html import unescape
         | 
|  | |
| 140 | 
             
                c = 2 * atan2(sqrt(a), sqrt(1 - a))
         | 
| 141 | 
             
                return R * c
         | 
| 142 |  | 
| 143 | 
            +
            def _expand_proxy_tb_alias(proxy: str | None) -> str | None:
         | 
| 144 | 
            +
                """Expand "tb" to a full proxy URL if applicable."""
         | 
| 145 | 
            +
                return "socks5://127.0.0.1:9150" if proxy == "tb" else proxy
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 146 |  | 
|  | |
| 147 | 
             
            logger = logging.getLogger("webscout.WEBS")
         | 
| 148 |  | 
| 149 |  | 
|  | |
| 152 |  | 
| 153 | 
             
                _executor: ThreadPoolExecutor = ThreadPoolExecutor()
         | 
| 154 | 
             
                _impersonates = (
         | 
| 155 | 
            +
                    "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107", "chrome_108", 
         | 
| 156 | 
            +
                    "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118", "chrome_119", "chrome_120", 
         | 
| 157 | 
            +
                    #"chrome_123", "chrome_124", "chrome_126",
         | 
| 158 | 
            +
                    "chrome_127", "chrome_128", "chrome_129",
         | 
| 159 | 
            +
                    "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_15.3", "safari_15.5", "safari_15.6.1", 
         | 
| 160 | 
            +
                    "safari_16", "safari_16.5", "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5", "safari_18", 
         | 
| 161 | 
            +
                    "safari_ipad_18",
         | 
| 162 | 
            +
                    "edge_101", "edge_122", "edge_127",
         | 
| 163 | 
             
                )  # fmt: skip
         | 
| 164 |  | 
| 165 | 
             
                def __init__(
         | 
| 166 | 
             
                    self,
         | 
| 167 | 
            +
                    headers: dict[str, str] | None = None,
         | 
| 168 | 
            +
                    proxy: str | None = None,
         | 
| 169 | 
            +
                    proxies: dict[str, str] | str | None = None,  # deprecated
         | 
| 170 | 
            +
                    timeout: int | None = 10,
         | 
| 171 | 
             
                ) -> None:
         | 
| 172 | 
             
                    """Initialize the WEBS object.
         | 
| 173 |  | 
|  | |
| 177 | 
             
                            example: "http://user:[email protected]:3128". Defaults to None.
         | 
| 178 | 
             
                        timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
         | 
| 179 | 
             
                    """
         | 
| 180 | 
            +
                    self.proxy: str | None = _expand_proxy_tb_alias(proxy)  # replaces "tb" with "socks5://127.0.0.1:9150"
         | 
| 181 | 
             
                    assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
         | 
| 182 | 
             
                    if not proxy and proxies:
         | 
| 183 | 
             
                        warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
         | 
| 184 | 
             
                        self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
         | 
| 185 | 
             
                    self.headers = headers if headers else {}
         | 
| 186 | 
             
                    self.headers["Referer"] = "https://duckduckgo.com/"
         | 
| 187 | 
            +
                    self.client = primp.Client(
         | 
| 188 | 
             
                        headers=self.headers,
         | 
| 189 | 
             
                        proxy=self.proxy,
         | 
| 190 | 
             
                        timeout=timeout,
         | 
|  | |
| 195 | 
             
                        verify=False,
         | 
| 196 | 
             
                    )
         | 
| 197 | 
             
                    self._exception_event = Event()
         | 
| 198 | 
            +
                    self._chat_messages: list[dict[str, str]] = []
         | 
| 199 | 
            +
                    self._chat_tokens_count = 0
         | 
| 200 | 
             
                    self._chat_vqd: str = ""
         | 
| 201 |  | 
| 202 | 
            +
                def __enter__(self) -> WEBS:
         | 
| 203 | 
             
                    return self
         | 
| 204 |  | 
| 205 | 
             
                def __exit__(
         | 
| 206 | 
             
                    self,
         | 
| 207 | 
            +
                    exc_type: type[BaseException] | None = None,
         | 
| 208 | 
            +
                    exc_val: BaseException | None = None,
         | 
| 209 | 
            +
                    exc_tb: TracebackType | None = None,
         | 
| 210 | 
             
                ) -> None:
         | 
| 211 | 
             
                    pass
         | 
| 212 |  | 
| 213 | 
             
                @cached_property
         | 
| 214 | 
            +
                def parser(self) -> LHTMLParser:
         | 
| 215 | 
             
                    """Get HTML parser."""
         | 
| 216 | 
             
                    return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
         | 
| 217 |  | 
|  | |
| 219 | 
             
                    self,
         | 
| 220 | 
             
                    method: str,
         | 
| 221 | 
             
                    url: str,
         | 
| 222 | 
            +
                    params: dict[str, str] | None = None,
         | 
| 223 | 
            +
                    content: bytes | None = None,
         | 
| 224 | 
            +
                    data: dict[str, str] | bytes | None = None,
         | 
| 225 | 
             
                ) -> bytes:
         | 
| 226 | 
             
                    if self._exception_event.is_set():
         | 
| 227 | 
             
                        raise WebscoutE("Exception occurred in previous call.")
         | 
|  | |
| 245 | 
             
                    resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
         | 
| 246 | 
             
                    return _extract_vqd(resp_content, keywords)
         | 
| 247 |  | 
| 248 | 
            +
                def chat(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> str:
         | 
| 249 | 
            +
                    """Initiates a chat session with webscout AI.
         | 
| 250 |  | 
| 251 | 
             
                    Args:
         | 
| 252 | 
             
                        keywords (str): The initial message or question to send to the AI.
         | 
| 253 | 
            +
                        model (str): The model to use: "gpt-4o-mini", "claude-3-haiku", "llama-3.1-70b", "mixtral-8x7b".
         | 
| 254 | 
            +
                            Defaults to "gpt-4o-mini".
         | 
| 255 | 
             
                        timeout (int): Timeout value for the HTTP client. Defaults to 20.
         | 
| 256 |  | 
| 257 | 
             
                    Returns:
         | 
| 258 | 
             
                        str: The response from the AI.
         | 
| 259 | 
             
                    """
         | 
| 260 | 
            +
                    models_deprecated = {
         | 
| 261 | 
            +
                        "gpt-3.5": "gpt-4o-mini",
         | 
| 262 | 
            +
                        "llama-3-70b": "llama-3.1-70b",
         | 
| 263 | 
            +
                    }
         | 
| 264 | 
            +
                    if model in models_deprecated:
         | 
| 265 | 
            +
                        logger.info(f"{model=} is deprecated, using {models_deprecated[model]}")
         | 
| 266 | 
            +
                        model = models_deprecated[model]
         | 
| 267 | 
             
                    models = {
         | 
| 268 | 
             
                        "claude-3-haiku": "claude-3-haiku-20240307",
         | 
|  | |
|  | |
|  | |
| 269 | 
             
                        "gpt-4o-mini": "gpt-4o-mini",
         | 
| 270 | 
            +
                        "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
         | 
| 271 | 
            +
                        "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         | 
| 272 | 
             
                    }
         | 
| 273 | 
             
                    # vqd
         | 
| 274 | 
             
                    if not self._chat_vqd:
         | 
|  | |
| 276 | 
             
                        self._chat_vqd = resp.headers.get("x-vqd-4", "")
         | 
| 277 |  | 
| 278 | 
             
                    self._chat_messages.append({"role": "user", "content": keywords})
         | 
| 279 | 
            +
                    self._chat_tokens_count += len(keywords) // 4 if len(keywords) >= 4 else 1  # approximate number of tokens
         | 
| 280 |  | 
| 281 | 
             
                    json_data = {
         | 
| 282 | 
             
                        "model": models[model],
         | 
|  | |
| 290 | 
             
                    )
         | 
| 291 | 
             
                    self._chat_vqd = resp.headers.get("x-vqd-4", "")
         | 
| 292 |  | 
| 293 | 
            +
                    data = ",".join(x for line in resp.text.rstrip("[DONE]LIMT_CVRSA\n").split("data:") if (x := line.strip()))
         | 
| 294 | 
            +
                    data = json_loads("[" + data + "]")
         | 
| 295 | 
            +
             | 
| 296 | 
            +
                    results = []
         | 
| 297 | 
            +
                    for x in data:
         | 
| 298 | 
            +
                        if x.get("action") == "error":
         | 
| 299 | 
            +
                            err_message = x.get("type", "")
         | 
| 300 | 
            +
                            if x.get("status") == 429:
         | 
| 301 | 
            +
                                raise (
         | 
| 302 | 
            +
                                    ConversationLimitException(err_message)
         | 
| 303 | 
            +
                                    if err_message == "ERR_CONVERSATION_LIMIT"
         | 
| 304 | 
            +
                                    else RatelimitE(err_message)
         | 
| 305 | 
            +
                                )
         | 
| 306 | 
            +
                            raise WebscoutE(err_message)
         | 
| 307 | 
            +
                        elif message := x.get("message"):
         | 
| 308 | 
            +
                            results.append(message)
         | 
| 309 | 
            +
                    result = "".join(results)
         | 
| 310 |  | 
| 311 | 
             
                    self._chat_messages.append({"role": "assistant", "content": result})
         | 
| 312 | 
            +
                    self._chat_tokens_count += len(results)
         | 
| 313 | 
             
                    return result
         | 
| 314 |  | 
| 315 | 
             
                def text(
         | 
|  | |
| 317 | 
             
                    keywords: str,
         | 
| 318 | 
             
                    region: str = "wt-wt",
         | 
| 319 | 
             
                    safesearch: str = "moderate",
         | 
| 320 | 
            +
                    timelimit: str | None = None,
         | 
| 321 | 
             
                    backend: str = "api",
         | 
| 322 | 
            +
                    max_results: int | None = None,
         | 
| 323 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 324 | 
            +
                    """webscout text search. Query params: https://duckduckgo.com/params.
         | 
| 325 |  | 
| 326 | 
             
                    Args:
         | 
| 327 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 359 | 
             
                    keywords: str,
         | 
| 360 | 
             
                    region: str = "wt-wt",
         | 
| 361 | 
             
                    safesearch: str = "moderate",
         | 
| 362 | 
            +
                    timelimit: str | None = None,
         | 
| 363 | 
            +
                    max_results: int | None = None,
         | 
| 364 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 365 | 
            +
                    """webscout text search. Query params: https://duckduckgo.com/params.
         | 
| 366 |  | 
| 367 | 
             
                    Args:
         | 
| 368 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 405 | 
             
                        payload["df"] = timelimit
         | 
| 406 |  | 
| 407 | 
             
                    cache = set()
         | 
| 408 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 409 |  | 
| 410 | 
            +
                    def _text_api_page(s: int) -> list[dict[str, str]]:
         | 
| 411 | 
             
                        payload["s"] = f"{s}"
         | 
| 412 | 
             
                        resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
         | 
| 413 | 
             
                        page_data = _text_extract_json(resp_content, keywords)
         | 
|  | |
| 442 | 
             
                    self,
         | 
| 443 | 
             
                    keywords: str,
         | 
| 444 | 
             
                    region: str = "wt-wt",
         | 
| 445 | 
            +
                    timelimit: str | None = None,
         | 
| 446 | 
            +
                    max_results: int | None = None,
         | 
| 447 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 448 | 
            +
                    """webscout text search. Query params: https://duckduckgo.com/params.
         | 
| 449 |  | 
| 450 | 
             
                    Args:
         | 
| 451 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 479 | 
             
                        payload["vqd"] = vqd
         | 
| 480 |  | 
| 481 | 
             
                    cache = set()
         | 
| 482 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 483 |  | 
| 484 | 
            +
                    def _text_html_page(s: int) -> list[dict[str, str]]:
         | 
| 485 | 
             
                        payload["s"] = f"{s}"
         | 
| 486 | 
             
                        resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
         | 
| 487 | 
             
                        if b"No  results." in resp_content:
         | 
|  | |
| 490 | 
             
                        page_results = []
         | 
| 491 | 
             
                        tree = document_fromstring(resp_content, self.parser)
         | 
| 492 | 
             
                        elements = tree.xpath("//div[h2]")
         | 
| 493 | 
            +
                        if not isinstance(elements, list):
         | 
| 494 | 
             
                            return []
         | 
| 495 | 
             
                        for e in elements:
         | 
| 496 | 
             
                            if isinstance(e, _Element):
         | 
| 497 | 
             
                                hrefxpath = e.xpath("./a/@href")
         | 
| 498 | 
            +
                                href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
         | 
| 499 | 
             
                                if (
         | 
| 500 | 
             
                                    href
         | 
| 501 | 
             
                                    and href not in cache
         | 
|  | |
| 505 | 
             
                                ):
         | 
| 506 | 
             
                                    cache.add(href)
         | 
| 507 | 
             
                                    titlexpath = e.xpath("./h2/a/text()")
         | 
| 508 | 
            +
                                    title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
         | 
| 509 | 
             
                                    bodyxpath = e.xpath("./a//text()")
         | 
| 510 | 
            +
                                    body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
         | 
| 511 | 
             
                                    result = {
         | 
| 512 | 
             
                                        "title": _normalize(title),
         | 
| 513 | 
             
                                        "href": _normalize_url(href),
         | 
|  | |
| 532 | 
             
                    self,
         | 
| 533 | 
             
                    keywords: str,
         | 
| 534 | 
             
                    region: str = "wt-wt",
         | 
| 535 | 
            +
                    timelimit: str | None = None,
         | 
| 536 | 
            +
                    max_results: int | None = None,
         | 
| 537 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 538 | 
            +
                    """webscout text search. Query params: https://duckduckgo.com/params.
         | 
| 539 |  | 
| 540 | 
             
                    Args:
         | 
| 541 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 566 | 
             
                        payload["df"] = timelimit
         | 
| 567 |  | 
| 568 | 
             
                    cache = set()
         | 
| 569 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 570 |  | 
| 571 | 
            +
                    def _text_lite_page(s: int) -> list[dict[str, str]]:
         | 
| 572 | 
             
                        payload["s"] = f"{s}"
         | 
| 573 | 
             
                        resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
         | 
| 574 | 
             
                        if b"No more results." in resp_content:
         | 
|  | |
| 577 | 
             
                        page_results = []
         | 
| 578 | 
             
                        tree = document_fromstring(resp_content, self.parser)
         | 
| 579 | 
             
                        elements = tree.xpath("//table[last()]//tr")
         | 
| 580 | 
            +
                        if not isinstance(elements, list):
         | 
| 581 | 
             
                            return []
         | 
| 582 |  | 
| 583 | 
             
                        data = zip(cycle(range(1, 5)), elements)
         | 
|  | |
| 585 | 
             
                            if isinstance(e, _Element):
         | 
| 586 | 
             
                                if i == 1:
         | 
| 587 | 
             
                                    hrefxpath = e.xpath(".//a//@href")
         | 
| 588 | 
            +
                                    href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
         | 
| 589 | 
             
                                    if (
         | 
| 590 | 
             
                                        href is None
         | 
| 591 | 
             
                                        or href in cache
         | 
|  | |
| 597 | 
             
                                    else:
         | 
| 598 | 
             
                                        cache.add(href)
         | 
| 599 | 
             
                                        titlexpath = e.xpath(".//a//text()")
         | 
| 600 | 
            +
                                        title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
         | 
| 601 | 
             
                                elif i == 2:
         | 
| 602 | 
             
                                    bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
         | 
| 603 | 
             
                                    body = (
         | 
| 604 | 
             
                                        "".join(str(x) for x in bodyxpath).strip()
         | 
| 605 | 
            +
                                        if bodyxpath and isinstance(bodyxpath, list)
         | 
| 606 | 
             
                                        else ""
         | 
| 607 | 
             
                                    )
         | 
| 608 | 
             
                                    if href:
         | 
|  | |
| 631 | 
             
                    keywords: str,
         | 
| 632 | 
             
                    region: str = "wt-wt",
         | 
| 633 | 
             
                    safesearch: str = "moderate",
         | 
| 634 | 
            +
                    timelimit: str | None = None,
         | 
| 635 | 
            +
                    size: str | None = None,
         | 
| 636 | 
            +
                    color: str | None = None,
         | 
| 637 | 
            +
                    type_image: str | None = None,
         | 
| 638 | 
            +
                    layout: str | None = None,
         | 
| 639 | 
            +
                    license_image: str | None = None,
         | 
| 640 | 
            +
                    max_results: int | None = None,
         | 
| 641 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 642 | 
            +
                    """webscout images search. Query params: https://duckduckgo.com/params.
         | 
| 643 |  | 
| 644 | 
             
                    Args:
         | 
| 645 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 687 | 
             
                    }
         | 
| 688 |  | 
| 689 | 
             
                    cache = set()
         | 
| 690 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 691 |  | 
| 692 | 
            +
                    def _images_page(s: int) -> list[dict[str, str]]:
         | 
| 693 | 
             
                        payload["s"] = f"{s}"
         | 
| 694 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload)
         | 
| 695 | 
             
                        resp_json = json_loads(resp_content)
         | 
|  | |
| 729 | 
             
                    keywords: str,
         | 
| 730 | 
             
                    region: str = "wt-wt",
         | 
| 731 | 
             
                    safesearch: str = "moderate",
         | 
| 732 | 
            +
                    timelimit: str | None = None,
         | 
| 733 | 
            +
                    resolution: str | None = None,
         | 
| 734 | 
            +
                    duration: str | None = None,
         | 
| 735 | 
            +
                    license_videos: str | None = None,
         | 
| 736 | 
            +
                    max_results: int | None = None,
         | 
| 737 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 738 | 
            +
                    """webscout videos search. Query params: https://duckduckgo.com/params.
         | 
| 739 |  | 
| 740 | 
             
                    Args:
         | 
| 741 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 774 | 
             
                    }
         | 
| 775 |  | 
| 776 | 
             
                    cache = set()
         | 
| 777 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 778 |  | 
| 779 | 
            +
                    def _videos_page(s: int) -> list[dict[str, str]]:
         | 
| 780 | 
             
                        payload["s"] = f"{s}"
         | 
| 781 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload)
         | 
| 782 | 
             
                        resp_json = json_loads(resp_content)
         | 
|  | |
| 806 | 
             
                    keywords: str,
         | 
| 807 | 
             
                    region: str = "wt-wt",
         | 
| 808 | 
             
                    safesearch: str = "moderate",
         | 
| 809 | 
            +
                    timelimit: str | None = None,
         | 
| 810 | 
            +
                    max_results: int | None = None,
         | 
| 811 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 812 | 
            +
                    """webscout news search. Query params: https://duckduckgo.com/params.
         | 
| 813 |  | 
| 814 | 
             
                    Args:
         | 
| 815 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 843 | 
             
                        payload["df"] = timelimit
         | 
| 844 |  | 
| 845 | 
             
                    cache = set()
         | 
| 846 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 847 |  | 
| 848 | 
            +
                    def _news_page(s: int) -> list[dict[str, str]]:
         | 
| 849 | 
             
                        payload["s"] = f"{s}"
         | 
| 850 | 
             
                        resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload)
         | 
| 851 | 
             
                        resp_json = json_loads(resp_content)
         | 
|  | |
| 878 |  | 
| 879 | 
             
                    return list(islice(results, max_results))
         | 
| 880 |  | 
| 881 | 
            +
                def answers(self, keywords: str) -> list[dict[str, str]]:
         | 
| 882 | 
            +
                    """webscout instant answers. Query params: https://duckduckgo.com/params.
         | 
| 883 |  | 
| 884 | 
             
                    Args:
         | 
| 885 | 
             
                        keywords: keywords for query,
         | 
|  | |
| 949 |  | 
| 950 | 
             
                    return results
         | 
| 951 |  | 
| 952 | 
            +
                def suggestions(self, keywords: str, region: str = "wt-wt") -> list[dict[str, str]]:
         | 
| 953 | 
            +
                    """webscout suggestions. Query params: https://duckduckgo.com/params.
         | 
| 954 |  | 
| 955 | 
             
                    Args:
         | 
| 956 | 
             
                        keywords: keywords for query.
         | 
|  | |
| 977 | 
             
                def maps(
         | 
| 978 | 
             
                    self,
         | 
| 979 | 
             
                    keywords: str,
         | 
| 980 | 
            +
                    place: str | None = None,
         | 
| 981 | 
            +
                    street: str | None = None,
         | 
| 982 | 
            +
                    city: str | None = None,
         | 
| 983 | 
            +
                    county: str | None = None,
         | 
| 984 | 
            +
                    state: str | None = None,
         | 
| 985 | 
            +
                    country: str | None = None,
         | 
| 986 | 
            +
                    postalcode: str | None = None,
         | 
| 987 | 
            +
                    latitude: str | None = None,
         | 
| 988 | 
            +
                    longitude: str | None = None,
         | 
| 989 | 
             
                    radius: int = 0,
         | 
| 990 | 
            +
                    max_results: int | None = None,
         | 
| 991 | 
            +
                ) -> list[dict[str, str]]:
         | 
| 992 | 
            +
                    """webscout maps search. Query params: https://duckduckgo.com/params.
         | 
| 993 |  | 
| 994 | 
             
                    Args:
         | 
| 995 | 
             
                        keywords: keywords for query
         | 
|  | |
| 1072 | 
             
                    logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
         | 
| 1073 |  | 
| 1074 | 
             
                    cache = set()
         | 
| 1075 | 
            +
                    results: list[dict[str, str]] = []
         | 
| 1076 |  | 
| 1077 | 
             
                    def _maps_page(
         | 
| 1078 | 
            +
                        bbox: tuple[Decimal, Decimal, Decimal, Decimal],
         | 
| 1079 | 
            +
                    ) -> list[dict[str, str]] | None:
         | 
| 1080 | 
             
                        if max_results and len(results) >= max_results:
         | 
| 1081 | 
             
                            return None
         | 
| 1082 | 
             
                        lat_t, lon_l, lat_b, lon_r = bbox
         | 
|  | |
| 1163 |  | 
| 1164 | 
             
                    return list(islice(results, max_results))
         | 
| 1165 |  | 
| 1166 | 
            +
                def translate(self, keywords: list[str] | str, from_: str | None = None, to: str = "en") -> list[dict[str, str]]:
         | 
| 1167 | 
            +
                    """webscout translate.
         | 
|  | |
|  | |
| 1168 |  | 
| 1169 | 
             
                    Args:
         | 
| 1170 | 
             
                        keywords: string or list of strings to translate.
         | 
|  | |
| 1191 | 
             
                    if from_:
         | 
| 1192 | 
             
                        payload["from"] = from_
         | 
| 1193 |  | 
| 1194 | 
            +
                    def _translate_keyword(keyword: str) -> dict[str, str]:
         | 
| 1195 | 
             
                        resp_content = self._get_url(
         | 
| 1196 | 
             
                            "POST",
         | 
| 1197 | 
             
                            "https://duckduckgo.com/translation.js",
         | 
| 1198 | 
             
                            params=payload,
         | 
| 1199 | 
             
                            content=keyword.encode(),
         | 
| 1200 | 
             
                        )
         | 
| 1201 | 
            +
                        page_data: dict[str, str] = json_loads(resp_content)
         | 
| 1202 | 
             
                        page_data["original"] = keyword
         | 
| 1203 | 
             
                        return page_data
         | 
| 1204 |  | 
|  | |
| 1214 |  | 
| 1215 | 
             
                    return results
         | 
| 1216 |  | 
| 1217 | 
            +
             | 
| 1218 | 
             
            html_parser = html.parser.HTMLParser()
         | 
| 1219 |  | 
| 1220 |  | 

