import gradio as gr import requests import urllib.parse import xml.etree.ElementTree as ET from typing import List, Dict athtk = "0dWGjl7XuVq54v012KjGLEhRSLjj" def update_tk(): url = "https://ops.epo.org/3.2/auth/accesstoken" payload = 'grant_type=client_credentials' headers = { 'authorization': 'Basic RU45NzZ3RE5UM09lZ3ZCUURwMHE0NlJXN0xwZE5CNjNFZTNxRGJ6UnJwNGFWQVBmOnhZR05uQzk1N0dKV0lvcnM2ZWV4TmVrcUFybUVtdzU1ZkxjbUZlcDdDR0w5ZzZCdGsyY0hMeVBHZTkwQTBXOEE=', 'Content-Type': 'application/x-www-form-urlencoded' } response = requests.request("POST", url, headers=headers, data=payload) global athtk athtk = response.json()['access_token'] def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]: """ Extracts English abstracts, country, document number, and date from patent XML data. Args: xml_content (str): XML content as a string. Returns: List[Dict[str, str]]: A list of dictionaries containing patent information. """ root = ET.fromstring(xml_content) namespaces = {'default': 'http://www.epo.org/exchange'} extracted_patents = [] # Find all 'exchange-document' elements for doc in root.findall('.//default:exchange-document', namespaces): # Directly find the English abstract using a specific XPath predicate. # If no English abstract is found, skip to the next document. if (en_abstract := doc.find('.//default:abstract[@lang="en"]', namespaces)) is None: continue # Extract abstract text from child

elements abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text) # **FIX:** The XPath has been corrected to include the namespace prefix for each element in the path. # This path is also made more specific to target the date within the 'docdb' document-id, # ensuring the correct date is retrieved. date_elem = doc.find('./default:bibliographic-data/default:publication-reference/default:document-id[@document-id-type="docdb"]/default:date', namespaces) date = date_elem.text if date_elem is not None else 'N/A' # Build the dictionary and add it to the list extracted_patents.append({ 'country': doc.get('country', 'N/A'), 'doc_number': doc.get('doc-number', 'N/A'), 'date': date, 'abstract': abstract_text, }) return extracted_patents def search_from_abstract(query,retried=False): base_url = "https://ops.epo.org/3.2/rest-services/published-data/search" endpoint = "abstract" start_range = 1 end_range = 100 headers = { 'accept': 'application/xml', 'Authorization': f'Bearer {athtk}', } print(headers) url = f"{base_url}/{endpoint}?q={urllib.parse.quote_plus(query)}&Range={start_range}-{end_range}" response = requests.request("GET", url, headers=headers) if response.status_code == 400 and not retried: update_tk() return search_from_abstract(query, retried=True) elif response.status_code == 200: return extract_patent_abstracts(response.text) else: print(f"Error: {response.status_code} - {response.text}") response.raise_for_status() app = gr.Interface( fn=search_from_abstract, inputs="text", outputs="text", title="Patent Abstract Search", description="Search patents by abstract using the European Patent Office API." ) app.launch(mcp_server=True,share=True)