Spaces:

bbfizp
/

patent-mcp

Running

bbfizp commited on Jun 7

Commit

83f44f2

verified ·

1 Parent(s): 9880381

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,8 +45,10 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
         # Extract abstract text from child <p> elements
         abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
-        # Directly find the date element and get its text, defaulting to 'N/A'
-        date_elem = doc.find('.//publication-reference/document-id/date', namespaces)
         date = date_elem.text if date_elem is not None else 'N/A'
         # Build the dictionary and add it to the list
@@ -60,6 +62,7 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
     return extracted_patents
 def search_from_abstract(query,retried=False):
     base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"

         # Extract abstract text from child <p> elements
         abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
+        # **FIX:** The XPath has been corrected to include the namespace prefix for each element in the path.
+        # This path is also made more specific to target the date within the 'docdb' document-id,
+        # ensuring the correct date is retrieved.
+        date_elem = doc.find('./default:bibliographic-data/default:publication-reference/default:document-id[@document-id-type="docdb"]/default:date', namespaces)
         date = date_elem.text if date_elem is not None else 'N/A'
         # Build the dictionary and add it to the list
     return extracted_patents
 def search_from_abstract(query,retried=False):
     base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"