Spaces:
Running
Running
import requests | |
import re | |
import bs4 | |
from datasets import load_dataset | |
import pandas as pd | |
import logging | |
from caching import with_caching | |
from utils import with_error_handling, make_api_request | |
logger = logging.getLogger(__name__) | |
try: | |
livertox_dataset = load_dataset("cmcmaster/livertox", split="train") | |
livertox_df = livertox_dataset.to_pandas() | |
logger.info(f"Loaded LiverTox dataset with {len(livertox_df)} drugs") | |
except Exception as e: | |
logger.error(f"Could not load LiverTox dataset: {e}") | |
livertox_df = None | |
def search_adverse_events(drug_name: str, limit: int = 5): | |
""" | |
Search FAERS for a drug and return brief summaries. | |
Args: | |
drug_name: Generic or brand name to search (case-insensitive). | |
limit: Maximum number of FAERS safety reports to return. | |
Returns: | |
Dict with a ``contexts`` key - list of objects ``{id, text}`` suitable | |
for an LLM to inject as context. | |
""" | |
# Input validation | |
if not drug_name or not drug_name.strip(): | |
raise ValueError("Drug name cannot be empty") | |
base_url = "https://api.fda.gov/drug/event.json" | |
query_params = { | |
"search": f'patient.drug.medicinalproduct:"{drug_name.strip()}"', | |
"limit": min(max(1, limit), 100) # Ensure limit is between 1 and 100 | |
} | |
response = make_api_request(base_url, query_params, timeout=10) | |
if response.status_code != 200: | |
if response.status_code == 404: | |
# Return empty results instead of error for not found | |
return { | |
"contexts": [], | |
"total_found": 0, | |
"query": drug_name, | |
"message": "No adverse events found for this drug" | |
} | |
raise requests.exceptions.RequestException(f"FAERS search failed: {response.status_code}") | |
data = response.json() | |
ctx = [] | |
for rec in data.get("results", []): | |
rid = rec.get("safetyreportid") | |
terms = [rx.get("reactionmeddrapt", "") for rx in rec.get("patient", {}).get("reaction", [])[:3]] | |
ctx.append({"id": str(rid), "text": "; ".join(terms)}) | |
return { | |
"contexts": ctx, | |
"total_found": data.get("meta", {}).get("results", {}).get("total", 0), | |
"query": drug_name | |
} | |
def fetch_event_details(event_id: str): | |
""" | |
Fetch a full FAERS case by safety-report ID. | |
Args: | |
event_id: Numeric FAERS ``safetyreportid`` string. | |
Returns: | |
Structured JSON with patient drugs, reactions, seriousness flag and the | |
full raw record (under ``full_record``). | |
""" | |
base_url = "https://api.fda.gov/drug/event.json" | |
query_params = { | |
"search": f'safetyreportid:"{event_id}"' | |
} | |
response = make_api_request(base_url, query_params) | |
if response.status_code != 200: | |
raise requests.exceptions.RequestException(f"Event fetch failed: {response.status_code}") | |
data = response.json() | |
if not data.get("results"): | |
raise ValueError("Record not found") | |
rec = data["results"][0] | |
patient = rec.get("patient", {}) | |
return { | |
"event_id": event_id, | |
"drugs": [d.get("medicinalproduct") for d in patient.get("drug", [])], | |
"reactions": [rx.get("reactionmeddrapt") for rx in patient.get("reaction", [])], | |
"serious": bool(int(rec.get("serious", "0"))), | |
"full_record": rec | |
} | |
def drug_label_warnings(drug_name: str): | |
""" | |
Return boxed warning, contraindications, interactions text and parsed interaction table. | |
Args: | |
drug_name: Generic name preferred. | |
Returns: | |
Dict with ``boxed_warning``, ``contraindications``, | |
``drug_interactions_section`` (strings) and ``drug_interactions_table`` (parsed list). | |
""" | |
base_url = "https://api.fda.gov/drug/label.json" | |
query_params = { | |
"search": f'openfda.generic_name:"{drug_name}"', | |
"limit": 1 | |
} | |
response = make_api_request(base_url, query_params) | |
if response.status_code != 200: | |
raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}") | |
data = response.json() | |
if not data.get("results"): | |
raise ValueError("Label not found") | |
lab = data["results"][0] | |
parsed_interactions_table = [] | |
interactions_table_html_list = lab.get("drug_interactions_table", []) | |
if interactions_table_html_list: | |
interactions_table_html = interactions_table_html_list[0] | |
if interactions_table_html and isinstance(interactions_table_html, str) and "<table" in interactions_table_html: | |
soup = bs4.BeautifulSoup(interactions_table_html, "html.parser") | |
table = soup.find("table") | |
if table: | |
rows = table.find_all("tr") | |
for row in rows: | |
cols = row.find_all("td") | |
if len(cols) >= 2: | |
col1_items = [item.get_text(strip=True) for item in cols[0].find_all("item")] | |
col1_text = "; ".join(col1_items) if col1_items else cols[0].get_text(strip=True) | |
col2_items = [item.get_text(strip=True) for item in cols[1].find_all("item")] | |
col2_text = "; ".join(col2_items) if col2_items else cols[1].get_text(strip=True) | |
if col1_text or col2_text: | |
parsed_interactions_table.append({ | |
"drug_or_category1": col1_text, | |
"drug_or_category2": col2_text | |
}) | |
else: | |
parsed_interactions_table.append({ | |
"raw_html_content": interactions_table_html, | |
"parsing_error": "No <table> tag found." | |
}) | |
return { | |
"boxed_warning": lab.get("boxed_warning", [""])[0], | |
"contraindications": lab.get("contraindications", [""])[0], | |
"drug_interactions_section": lab.get("drug_interactions", [""])[0], | |
"drug_interactions_table": parsed_interactions_table if parsed_interactions_table else "Not found or not applicable.", | |
"drug_name": drug_name | |
} | |
def drug_recalls(drug_name: str, limit: int = 5): | |
""" | |
Return recent FDA recall events for a drug. | |
Args: | |
drug_name: Free-text search string. | |
limit: Max rows. | |
Returns: | |
List of recall notices with recall_number, status, classification, reason. | |
""" | |
base_url = "https://api.fda.gov/drug/enforcement.json" | |
query_params = { | |
"search": f'product_description:"{drug_name}"', | |
"limit": min(limit, 50) | |
} | |
response = make_api_request(base_url, query_params) | |
if response.status_code != 200: | |
raise requests.exceptions.RequestException(f"Recall search failed: {response.status_code}") | |
data = response.json() | |
events = [] | |
for e in data.get("results", []): | |
events.append({ | |
"recall_number": e.get("recall_number"), | |
"status": e.get("status"), | |
"classification": e.get("classification"), | |
"reason": e.get("reason_for_recall", "")[:120] + ("…" if len(e.get("reason_for_recall", "")) > 120 else "") | |
}) | |
return { | |
"recalls": events, | |
"total_found": data.get("meta", {}).get("results", {}).get("total", 0), | |
"query": drug_name | |
} | |
LACTATION_PAT = re.compile(r"(?:8\.2\s*Lactation|Lactation\s*Risk\s*Summary)\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ and [A-Z][a-z]+ of Reproductive Potential|$)", re.I | re.S) | |
REPRODUCTIVE_POTENTIAL_PAT = re.compile(r"(?:8\.3\s*(?:Females\s+and\s+Males\s+of\s+Reproductive\s+Potential|Reproductive\s+Potential))\s*(.*?)(?:\n\s*8\.\d|\n\s*[A-Z][a-z]+ Use|$)", re.I | re.S) | |
def drug_pregnancy_lactation(drug_name: str): | |
""" | |
Return Pregnancy & Lactation text from FDA label with improved search and fallback data. | |
Args: | |
drug_name: Generic name preferred. | |
Returns: | |
Dict with pregnancy_text, pregnancy_registry, lactation_text, and reproductive_potential_text. | |
""" | |
# Input validation | |
if not drug_name or not drug_name.strip(): | |
raise ValueError("Drug name cannot be empty") | |
drug_name = drug_name.strip() | |
# Try multiple search strategies | |
search_strategies = [ | |
f'openfda.generic_name:"{drug_name}"', | |
f'openfda.brand_name:"{drug_name}"', | |
f'openfda.substance_name:"{drug_name}"', | |
f'generic_name:"{drug_name}"', | |
f'brand_name:"{drug_name}"' | |
] | |
base_url = "https://api.fda.gov/drug/label.json" | |
for search_query in search_strategies: | |
try: | |
query_params = { | |
"search": search_query, | |
"limit": 1 | |
} | |
response = make_api_request(base_url, query_params, timeout=8) | |
if response and response.status_code == 200: | |
data = response.json() | |
if data.get("results"): | |
lab = data["results"][0] | |
# Extract pregnancy/lactation data | |
use_in_specific_populations_text = "\n".join(lab.get("use_in_specific_populations", [])) | |
lactation_match = LACTATION_PAT.search(use_in_specific_populations_text) | |
lactation_text = lactation_match.group(1).strip() if lactation_match else lab.get("lactation", [""])[0] | |
if not lactation_text and lactation_match: | |
lactation_text = lactation_match.group(1).strip() | |
reproductive_potential_match = REPRODUCTIVE_POTENTIAL_PAT.search(use_in_specific_populations_text) | |
reproductive_potential_text = reproductive_potential_match.group(1).strip() if reproductive_potential_match else "" | |
pregnancy_text = lab.get("pregnancy", [""])[0] | |
pregnancy_registry = lab.get("pregnancy_exposure_registry", [""])[0] | |
# If we found meaningful data, return it | |
if pregnancy_text or lactation_text or reproductive_potential_text: | |
return { | |
"pregnancy_text": pregnancy_text or "Not found or not specified in the label.", | |
"pregnancy_registry": pregnancy_registry or "Not specified.", | |
"lactation_text": lactation_text or "Not found or not specified in the label.", | |
"reproductive_potential_text": reproductive_potential_text or "Not found or not specified in the label.", | |
"drug_name": drug_name, | |
"data_source": f"FDA Label (search: {search_query})" | |
} | |
except Exception as e: | |
continue | |
# If FDA search fails, return not found message | |
return { | |
"pregnancy_text": "FDA label data not available for this drug.", | |
"pregnancy_registry": "Not specified.", | |
"lactation_text": "FDA label data not available for this drug.", | |
"reproductive_potential_text": "FDA label data not available for this drug.", | |
"drug_name": drug_name, | |
"data_source": "FDA Label (not found)" | |
} | |
RENAL_PAT = re.compile(r"\brenal\b.*?\b(impairment|dysfunction|failure)\b", re.I | re.S) | |
HEP_PAT = re.compile(r"\bhepatic\b.*?\b(impairment|dysfunction|child(?:--|\s|-)?pugh)\b", re.I | re.S) | |
def drug_dose_adjustments(drug_name: str): | |
""" | |
Return renal & hepatic dosing excerpts from FDA label. | |
Args: | |
drug_name: Generic name. | |
Returns: | |
Dict with renal_excerpt and hepatic_excerpt strings (<=1000 chars each). | |
""" | |
base_url = "https://api.fda.gov/drug/label.json" | |
query_params = { | |
"search": f'openfda.generic_name:"{drug_name}"', | |
"limit": 1 | |
} | |
response = make_api_request(base_url, query_params) | |
if response.status_code != 200: | |
raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}") | |
data = response.json() | |
if not data.get("results"): | |
raise ValueError("Label not found") | |
label = data["results"][0] | |
sections = "\n".join(label.get(k, [""])[0] for k in ("dosage_and_administration", "use_in_specific_populations")) | |
renal = RENAL_PAT.search(sections) | |
hepatic = HEP_PAT.search(sections) | |
return { | |
"renal_excerpt": renal.group(0)[:1000] if renal else "Not found", | |
"hepatic_excerpt": hepatic.group(0)[:1000] if hepatic else "Not found", | |
"drug_name": drug_name | |
} | |
def drug_livertox_summary(drug_name: str): | |
""" | |
Return hepatotoxicity summary from LiverTox dataset. | |
Args: | |
drug_name: Drug name to search for (case-insensitive). | |
Returns: | |
Dict with drug info including hepatotoxicity, management, trade names, etc. | |
""" | |
if livertox_df is None: | |
raise ValueError("LiverTox dataset not available") | |
drug_name_clean = drug_name.strip().lower() | |
mask = livertox_df['drug_name'].str.lower() == drug_name_clean | |
matches = livertox_df[mask] | |
if matches.empty: | |
mask = livertox_df['drug_name'].str.lower().str.contains(drug_name_clean, na=False) | |
matches = livertox_df[mask] | |
if matches.empty: | |
mask = livertox_df['trade_names'].str.lower().str.contains(drug_name_clean, na=False) | |
matches = livertox_df[mask] | |
if matches.empty: | |
raise ValueError(f"Drug '{drug_name}' not found in LiverTox dataset") | |
drug_info = matches.iloc[0] | |
response = { | |
"drug_name": drug_info.get('drug_name', 'N/A'), | |
"trade_names": drug_info.get('trade_names', 'N/A'), | |
"drug_class": drug_info.get('drug_class', 'N/A'), | |
"last_updated": drug_info.get('last_updated', 'N/A'), | |
"hepatotoxicity": drug_info.get('hepatotoxicity', 'N/A'), | |
"mechanism_of_injury": drug_info.get('mechanism_of_injury', 'N/A'), | |
"outcome_and_management": drug_info.get('outcome_and_management', 'N/A'), | |
"introduction": drug_info.get('introduction', 'N/A'), | |
"background": drug_info.get('background', 'N/A'), | |
"source": "LiverTox Dataset (cmcmaster/livertox)", | |
"total_matches": len(matches), | |
"query": drug_name | |
} | |
if pd.notna(drug_info.get('components')): | |
try: | |
components = drug_info.get('components') | |
if isinstance(components, str) and components.startswith('['): | |
import ast | |
components = ast.literal_eval(components) | |
response["components"] = components | |
except: | |
response["components"] = drug_info.get('components') | |
return response |