bbfizp commited on
Commit
83f44f2
·
verified ·
1 Parent(s): 9880381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -45,8 +45,10 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
45
  # Extract abstract text from child <p> elements
46
  abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
47
 
48
- # Directly find the date element and get its text, defaulting to 'N/A'
49
- date_elem = doc.find('.//publication-reference/document-id/date', namespaces)
 
 
50
  date = date_elem.text if date_elem is not None else 'N/A'
51
 
52
  # Build the dictionary and add it to the list
@@ -60,6 +62,7 @@ def extract_patent_abstracts(xml_content: str) -> List[Dict[str, str]]:
60
  return extracted_patents
61
 
62
 
 
63
  def search_from_abstract(query,retried=False):
64
 
65
  base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"
 
45
  # Extract abstract text from child <p> elements
46
  abstract_text = ' '.join(p.text.strip() for p in en_abstract.findall('default:p', namespaces) if p.text)
47
 
48
+ # **FIX:** The XPath has been corrected to include the namespace prefix for each element in the path.
49
+ # This path is also made more specific to target the date within the 'docdb' document-id,
50
+ # ensuring the correct date is retrieved.
51
+ date_elem = doc.find('./default:bibliographic-data/default:publication-reference/default:document-id[@document-id-type="docdb"]/default:date', namespaces)
52
  date = date_elem.text if date_elem is not None else 'N/A'
53
 
54
  # Build the dictionary and add it to the list
 
62
  return extracted_patents
63
 
64
 
65
+
66
  def search_from_abstract(query,retried=False):
67
 
68
  base_url = "https://ops.epo.org/3.2/rest-services/published-data/search"