Spaces:

cmcmaster
/

pharmacy-mcp

Running

pharmacy-mcp / drug_data_endpoints.py

Chris McMaster

Updates, improvements, new ADR features

f32824f 3 months ago

15.2 kB

	import requests
	import re
	import bs4
	from datasets import load_dataset
	import pandas as pd
	import logging

	from caching import with_caching
	from utils import with_error_handling, make_api_request

	logger = logging.getLogger(__name__)

	try:
	livertox_dataset = load_dataset("cmcmaster/livertox", split="train")
	livertox_df = livertox_dataset.to_pandas()
	logger.info(f"Loaded LiverTox dataset with {len(livertox_df)} drugs")
	except Exception as e:
	logger.error(f"Could not load LiverTox dataset: {e}")
	livertox_df = None


	@with_error_handling
	@with_caching(ttl=1800)
	def search_adverse_events(drug_name: str, limit: int = 5):
	"""
	Search FAERS for a drug and return brief summaries.

	Args:
	drug_name: Generic or brand name to search (case-insensitive).
	limit: Maximum number of FAERS safety reports to return.

	Returns:
	Dict with a ``contexts`` key - list of objects ``{id, text}`` suitable
	for an LLM to inject as context.
	"""
	# Input validation
	if not drug_name or not drug_name.strip():
	raise ValueError("Drug name cannot be empty")

	base_url = "https://api.fda.gov/drug/event.json"
	query_params = {
	"search": f'patient.drug.medicinalproduct:"{drug_name.strip()}"',
	"limit": min(max(1, limit), 100) # Ensure limit is between 1 and 100
	}

	response = make_api_request(base_url, query_params, timeout=10)

	if response.status_code != 200:
	if response.status_code == 404:
	# Return empty results instead of error for not found
	return {
	"contexts": [],
	"total_found": 0,
	"query": drug_name,
	"message": "No adverse events found for this drug"
	}
	raise requests.exceptions.RequestException(f"FAERS search failed: {response.status_code}")

	data = response.json()
	ctx = []
	for rec in data.get("results", []):
	rid = rec.get("safetyreportid")
	terms = [rx.get("reactionmeddrapt", "") for rx in rec.get("patient", {}).get("reaction", [])[:3]]
	ctx.append({"id": str(rid), "text": "; ".join(terms)})

	return {
	"contexts": ctx,
	"total_found": data.get("meta", {}).get("results", {}).get("total", 0),
	"query": drug_name
	}

	@with_error_handling
	@with_caching(ttl=3600)
	def fetch_event_details(event_id: str):
	"""
	Fetch a full FAERS case by safety-report ID.

	Args:
	event_id: Numeric FAERS ``safetyreportid`` string.

	Returns:
	Structured JSON with patient drugs, reactions, seriousness flag and the
	full raw record (under ``full_record``).
	"""
	base_url = "https://api.fda.gov/drug/event.json"
	query_params = {
	"search": f'safetyreportid:"{event_id}"'
	}

	response = make_api_request(base_url, query_params)

	if response.status_code != 200:
	raise requests.exceptions.RequestException(f"Event fetch failed: {response.status_code}")

	data = response.json()
	if not data.get("results"):
	raise ValueError("Record not found")

	rec = data["results"][0]
	patient = rec.get("patient", {})

	return {
	"event_id": event_id,
	"drugs": [d.get("medicinalproduct") for d in patient.get("drug", [])],
	"reactions": [rx.get("reactionmeddrapt") for rx in patient.get("reaction", [])],
	"serious": bool(int(rec.get("serious", "0"))),
	"full_record": rec
	}

	@with_error_handling
	@with_caching(ttl=7200)
	def drug_label_warnings(drug_name: str):
	"""
	Return boxed warning, contraindications, interactions text and parsed interaction table.

	Args:
	drug_name: Generic name preferred.

	Returns:
	Dict with ``boxed_warning``, ``contraindications``,
	``drug_interactions_section`` (strings) and ``drug_interactions_table`` (parsed list).
	"""
	base_url = "https://api.fda.gov/drug/label.json"
	query_params = {
	"search": f'openfda.generic_name:"{drug_name}"',
	"limit": 1
	}

	response = make_api_request(base_url, query_params)

	if response.status_code != 200:
	raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")

	data = response.json()
	if not data.get("results"):
	raise ValueError("Label not found")

	lab = data["results"][0]

	parsed_interactions_table = []
	interactions_table_html_list = lab.get("drug_interactions_table", [])
	if interactions_table_html_list:
	interactions_table_html = interactions_table_html_list[0]
	if interactions_table_html and isinstance(interactions_table_html, str) and "<table" in interactions_table_html:
	soup = bs4.BeautifulSoup(interactions_table_html, "html.parser")
	table = soup.find("table")
	if table:
	rows = table.find_all("tr")
	for row in rows:
	cols = row.find_all("td")
	if len(cols) >= 2:
	col1_items = [item.get_text(strip=True) for item in cols[0].find_all("item")]
	col1_text = "; ".join(col1_items) if col1_items else cols[0].get_text(strip=True)

	col2_items = [item.get_text(strip=True) for item in cols[1].find_all("item")]
	col2_text = "; ".join(col2_items) if col2_items else cols[1].get_text(strip=True)

	if col1_text or col2_text:
	parsed_interactions_table.append({
	"drug_or_category1": col1_text,
	"drug_or_category2": col2_text
	})
	else:
	parsed_interactions_table.append({
	"raw_html_content": interactions_table_html,
	"parsing_error": "No <table> tag found."
	})

	return {
	"boxed_warning": lab.get("boxed_warning", [""])[0],
	"contraindications": lab.get("contraindications", [""])[0],
	"drug_interactions_section": lab.get("drug_interactions", [""])[0],
	"drug_interactions_table": parsed_interactions_table if parsed_interactions_table else "Not found or not applicable.",
	"drug_name": drug_name
	}

	@with_error_handling
	@with_caching(ttl=3600)
	def drug_recalls(drug_name: str, limit: int = 5):
	"""
	Return recent FDA recall events for a drug.

	Args:
	drug_name: Free-text search string.
	limit: Max rows.

	Returns:
	List of recall notices with recall_number, status, classification, reason.
	"""
	base_url = "https://api.fda.gov/drug/enforcement.json"
	query_params = {
	"search": f'product_description:"{drug_name}"',
	"limit": min(limit, 50)
	}

	response = make_api_request(base_url, query_params)

	if response.status_code != 200:
	raise requests.exceptions.RequestException(f"Recall search failed: {response.status_code}")

	data = response.json()
	events = []
	for e in data.get("results", []):
	events.append({
	"recall_number": e.get("recall_number"),
	"status": e.get("status"),
	"classification": e.get("classification"),
	"reason": e.get("reason_for_recall", "")[:120] + ("…" if len(e.get("reason_for_recall", "")) > 120 else "")
	})

	return {
	"recalls": events,
	"total_found": data.get("meta", {}).get("results", {}).get("total", 0),
	"query": drug_name
	}


	LACTATION_PAT = re.compile(r"(?:8\.2\sLactation\|Lactation\sRisk\sSummary)\s(.?)(?:\n\s8\.\d\|\n\s*[A-Z][a-z]+ and [A-Z][a-z]+ of Reproductive Potential\|$)", re.I \| re.S)
	REPRODUCTIVE_POTENTIAL_PAT = re.compile(r"(?:8\.3\s(?:Females\s+and\s+Males\s+of\s+Reproductive\s+Potential\|Reproductive\s+Potential))\s(.?)(?:\n\s8\.\d\|\n\s*[A-Z][a-z]+ Use\|$)", re.I \| re.S)

	@with_error_handling
	@with_caching(ttl=7200)
	def drug_pregnancy_lactation(drug_name: str):
	"""
	Return Pregnancy & Lactation text from FDA label with improved search and fallback data.

	Args:
	drug_name: Generic name preferred.

	Returns:
	Dict with pregnancy_text, pregnancy_registry, lactation_text, and reproductive_potential_text.
	"""
	# Input validation
	if not drug_name or not drug_name.strip():
	raise ValueError("Drug name cannot be empty")

	drug_name = drug_name.strip()

	# Try multiple search strategies
	search_strategies = [
	f'openfda.generic_name:"{drug_name}"',
	f'openfda.brand_name:"{drug_name}"',
	f'openfda.substance_name:"{drug_name}"',
	f'generic_name:"{drug_name}"',
	f'brand_name:"{drug_name}"'
	]

	base_url = "https://api.fda.gov/drug/label.json"

	for search_query in search_strategies:
	try:
	query_params = {
	"search": search_query,
	"limit": 1
	}

	response = make_api_request(base_url, query_params, timeout=8)

	if response and response.status_code == 200:
	data = response.json()
	if data.get("results"):
	lab = data["results"][0]

	# Extract pregnancy/lactation data
	use_in_specific_populations_text = "\n".join(lab.get("use_in_specific_populations", []))

	lactation_match = LACTATION_PAT.search(use_in_specific_populations_text)
	lactation_text = lactation_match.group(1).strip() if lactation_match else lab.get("lactation", [""])[0]
	if not lactation_text and lactation_match:
	lactation_text = lactation_match.group(1).strip()

	reproductive_potential_match = REPRODUCTIVE_POTENTIAL_PAT.search(use_in_specific_populations_text)
	reproductive_potential_text = reproductive_potential_match.group(1).strip() if reproductive_potential_match else ""

	pregnancy_text = lab.get("pregnancy", [""])[0]
	pregnancy_registry = lab.get("pregnancy_exposure_registry", [""])[0]

	# If we found meaningful data, return it
	if pregnancy_text or lactation_text or reproductive_potential_text:
	return {
	"pregnancy_text": pregnancy_text or "Not found or not specified in the label.",
	"pregnancy_registry": pregnancy_registry or "Not specified.",
	"lactation_text": lactation_text or "Not found or not specified in the label.",
	"reproductive_potential_text": reproductive_potential_text or "Not found or not specified in the label.",
	"drug_name": drug_name,
	"data_source": f"FDA Label (search: {search_query})"
	}
	except Exception as e:
	continue

	# If FDA search fails, return not found message
	return {
	"pregnancy_text": "FDA label data not available for this drug.",
	"pregnancy_registry": "Not specified.",
	"lactation_text": "FDA label data not available for this drug.",
	"reproductive_potential_text": "FDA label data not available for this drug.",
	"drug_name": drug_name,
	"data_source": "FDA Label (not found)"
	}



	RENAL_PAT = re.compile(r"\brenal\b.*?\b(impairment\|dysfunction\|failure)\b", re.I \| re.S)
	HEP_PAT = re.compile(r"\bhepatic\b.*?\b(impairment\|dysfunction\|child(?:--\|\s\|-)?pugh)\b", re.I \| re.S)

	@with_error_handling
	@with_caching(ttl=7200)
	def drug_dose_adjustments(drug_name: str):
	"""
	Return renal & hepatic dosing excerpts from FDA label.

	Args:
	drug_name: Generic name.

	Returns:
	Dict with renal_excerpt and hepatic_excerpt strings (<=1000 chars each).
	"""
	base_url = "https://api.fda.gov/drug/label.json"
	query_params = {
	"search": f'openfda.generic_name:"{drug_name}"',
	"limit": 1
	}

	response = make_api_request(base_url, query_params)

	if response.status_code != 200:
	raise requests.exceptions.RequestException(f"Label search failed: {response.status_code}")

	data = response.json()
	if not data.get("results"):
	raise ValueError("Label not found")

	label = data["results"][0]
	sections = "\n".join(label.get(k, [""])[0] for k in ("dosage_and_administration", "use_in_specific_populations"))

	renal = RENAL_PAT.search(sections)
	hepatic = HEP_PAT.search(sections)

	return {
	"renal_excerpt": renal.group(0)[:1000] if renal else "Not found",
	"hepatic_excerpt": hepatic.group(0)[:1000] if hepatic else "Not found",
	"drug_name": drug_name
	}

	@with_error_handling
	@with_caching(ttl=1800)
	def drug_livertox_summary(drug_name: str):
	"""
	Return hepatotoxicity summary from LiverTox dataset.

	Args:
	drug_name: Drug name to search for (case-insensitive).

	Returns:
	Dict with drug info including hepatotoxicity, management, trade names, etc.
	"""
	if livertox_df is None:
	raise ValueError("LiverTox dataset not available")

	drug_name_clean = drug_name.strip().lower()

	mask = livertox_df['drug_name'].str.lower() == drug_name_clean
	matches = livertox_df[mask]

	if matches.empty:
	mask = livertox_df['drug_name'].str.lower().str.contains(drug_name_clean, na=False)
	matches = livertox_df[mask]

	if matches.empty:
	mask = livertox_df['trade_names'].str.lower().str.contains(drug_name_clean, na=False)
	matches = livertox_df[mask]

	if matches.empty:
	raise ValueError(f"Drug '{drug_name}' not found in LiverTox dataset")

	drug_info = matches.iloc[0]

	response = {
	"drug_name": drug_info.get('drug_name', 'N/A'),
	"trade_names": drug_info.get('trade_names', 'N/A'),
	"drug_class": drug_info.get('drug_class', 'N/A'),
	"last_updated": drug_info.get('last_updated', 'N/A'),
	"hepatotoxicity": drug_info.get('hepatotoxicity', 'N/A'),
	"mechanism_of_injury": drug_info.get('mechanism_of_injury', 'N/A'),
	"outcome_and_management": drug_info.get('outcome_and_management', 'N/A'),
	"introduction": drug_info.get('introduction', 'N/A'),
	"background": drug_info.get('background', 'N/A'),
	"source": "LiverTox Dataset (cmcmaster/livertox)",
	"total_matches": len(matches),
	"query": drug_name
	}

	if pd.notna(drug_info.get('components')):
	try:
	components = drug_info.get('components')
	if isinstance(components, str) and components.startswith('['):
	import ast
	components = ast.literal_eval(components)
	response["components"] = components
	except:
	response["components"] = drug_info.get('components')

	return response