Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -45,8 +45,10 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
|
|
45 |
# Extract abstract text from child <p> elements
|
46 |
abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
|
47 |
|
48 |
-
#
|
49 |
-
|
|
|
|
|
50 |
date = date_elem.text if date_elem is not None else 'N/A'
|
51 |
|
52 |
# Build the dictionary and add it to the list
|
@@ -60,6 +62,7 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
|
|
60 |
return extracted_patents
|
61 |
|
62 |
|
|
|
63 |
def search_from_abstract(query,retried=False):
|
64 |
|
65 |
base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"
|
|
|
45 |
# Extract abstract text from child <p> elements
|
46 |
abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
|
47 |
|
48 |
+
# **FIX:** The XPath has been corrected to include the namespace prefix for each element in the path.
|
49 |
+
# This path is also made more specific to target the date within the 'docdb' document-id,
|
50 |
+
# ensuring the correct date is retrieved.
|
51 |
+
date_elem = doc.find('./default:bibliographic-data/default:publication-reference/default:document-id[@document-id-type="docdb"]/default:date', namespaces)
|
52 |
date = date_elem.text if date_elem is not None else 'N/A'
|
53 |
|
54 |
# Build the dictionary and add it to the list
|
|
|
62 |
return extracted_patents
|
63 |
|
64 |
|
65 |
+
|
66 |
def search_from_abstract(query,retried=False):
|
67 |
|
68 |
base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"
|