patent-mcp / app.py
bbfizp's picture
Update app.py
8d3d168 verified
import gradio as gr
import requests
import urllib.parse
import xml.etree.ElementTree as ET
from typing import List, Dict
athtk = "0dWGjl7XuVq54v012KjGLEhRSLjj"
def update_tk():
url = "https://ops.epo.org/3.2/auth/accesstoken"
payload = 'grant_type=client_credentials'
headers = {
'authorization': 'Basic RU45NzZ3RE5UM09lZ3ZCUURwMHE0NlJXN0xwZE5CNjNFZTNxRGJ6UnJwNGFWQVBmOnhZR05uQzk1N0dKV0lvcnM2ZWV4TmVrcUFybUVtdzU1ZkxjbUZlcDdDR0w5ZzZCdGsyY0hMeVBHZTkwQTBXOEE=',
'Content-Type': 'application/x-www-form-urlencoded'
}
response = requests.request("POST", url, headers=headers, data=payload)
global athtk
athtk = response.json()['access_token']
def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
"""
Extracts English abstracts, country, document number, and date from patent XML data.
Args:
xml_content (str): XML content as a string.
Returns:
List[Dict[str, str]]: A list of dictionaries containing patent information.
"""
root = ET.fromstring(xml_content)
namespaces = {'default': 'http://www.epo.org/exchange'}
extracted_patents = []
# Find all 'exchange-document' elements
for doc in root.findall('.//default:exchange-document', namespaces):
# Directly find the English abstract using a specific XPath predicate.
# If no English abstract is found, skip to the next document.
if (en_abstract := doc.find('.//default:abstract[@lang="en"]', namespaces)) is None:
continue
# Extract abstract text from child <p> elements
abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
# **FIX:** The XPath has been corrected to include the namespace prefix for each element in the path.
# This path is also made more specific to target the date within the 'docdb' document-id,
# ensuring the correct date is retrieved.
date_elem = doc.find('./default:bibliographic-data/default:publication-reference/default:document-id[@document-id-type="docdb"]/default:date', namespaces)
date = date_elem.text if date_elem is not None else 'N/A'
# Build the dictionary and add it to the list
extracted_patents.append({
'country': doc.get('country', 'N/A'),
'doc_number': doc.get('doc-number', 'N/A'),
'date': date,
'abstract': abstract_text,
})
return extracted_patents
def search_from_abstract(query,retried=False):
base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"
endpoint = "abstract"
start_range = 1
end_range = 100
headers = {
'accept': 'application/xml',
'Authorization': f'Bearer {athtk}',
}
print(headers)
url = f"{base_url}/{endpoint}?q={urllib.parse.quote_plus(query)}&Range={start_range}-{end_range}"
response = requests.request("GET", url, headers=headers)
if response.status_code == 400 and not retried:
update_tk()
return search_from_abstract(query, retried=True)
elif response.status_code == 200:
return extract_patent_abstracts(response.text)
else:
print(f"Error: {response.status_code} - {response.text}")
response.raise_for_status()
app = gr.Interface(
fn=search_from_abstract,
inputs="text",
outputs="text",
title="Patent Abstract Search",
description="Search patents by abstract using the European Patent Office API."
)
app.launch(mcp_server=True,share=True)