PapersChat / toolsFunctions.py
as-cle-bert's picture
Upload 4 files
6ca31d3 verified
import urllib, urllib.request
from pydantic import Field
from datetime import datetime
from markitdown import MarkItDown
from Bio import Entrez
import xml.etree.ElementTree as ET
md = MarkItDown()
def format_today():
d = datetime.now()
if d.month < 10:
month = f"0{d.month}"
else:
month = d.month
if d.day < 10:
day = f"0{d.day}"
else:
day = d.day
if d.hour < 10:
hour = f"0{d.hour}"
else:
hour = d.hour
if d.minute < 10:
minute = f"0{d.hour}"
else:
minute = d.minute
today = f"{d.year}{month}{day}{hour}{minute}"
two_years_ago = f"{d.year-2}{month}{day}{hour}{minute}"
return today, two_years_ago
def arxiv_tool(search_query: str = Field(description="The query with which to search ArXiv database")):
"""A tool to search ArXiv"""
today, two_years_ago = format_today()
query = search_query.replace(" ", "+")
url = f'http://export.arxiv.org/api/query?search_query=all:{query}&submittedDate:[{two_years_ago}+TO+{today}]&start=0&max_results=3'
data = urllib.request.urlopen(url)
content = data.read().decode("utf-8")
f = open("arxiv_results.xml", "w")
f.write(content)
f.close()
result = md.convert("arxiv_results.xml")
return result.text_content
def search_pubmed(query):
Entrez.email = "[email protected]" # Replace with your email
handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
record = Entrez.read(handle)
handle.close()
return record["IdList"]
def fetch_pubmed_details(pubmed_ids):
Entrez.email = "[email protected]" # Replace with your email
handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="xml")
records = handle.read()
handle.close()
recs = records.decode("utf-8")
f = open("biomed_results.xml", "w")
f.write(recs)
f.close()
def fetch_xml():
tree = ET.parse("biomed_results.xml")
root = tree.getroot()
parsed_articles = []
for article in root.findall('PubmedArticle'):
# Extract title
title = article.find('.//ArticleTitle')
title_text = title.text if title is not None else "No title"
# Extract abstract
abstract = article.find('.//Abstract/AbstractText')
abstract_text = abstract.text if abstract is not None else "No abstract"
# Format output
formatted_entry = f"## {title_text}\n\n**Abstract**:\n\n{abstract_text}"
parsed_articles.append(formatted_entry)
return "\n\n".join(parsed_articles)
def pubmed_tool(search_query: str = Field(description="The query with which to search PubMed database")):
"""A tool to search PubMed"""
idlist = search_pubmed(search_query)
if len(idlist) == 0:
return "There is no significant match in PubMed"
fetch_pubmed_details(idlist)
content = fetch_xml()
return content