Spaces:

cmcmaster
/

pharmacy-mcp

Running

App Files Files Community

Chris McMaster commited on Jun 10

Commit

819adf9

1 Parent(s): 04e78e6

Improved drug parsing and generic matching

Browse files

Files changed (5) hide show

.gitignore +14 -0
app.py +74 -53
brand_to_generic.py +63 -100
dbi_mcp.py +401 -10
requirements.txt +4 -2

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+inputs.json
+/venv/
+/.venv/
+# Standard python project gitignore
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
+*.pywz
+*.pyzw
+*.pyzwz

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import gradio as gr
 from typing import Dict, Any
-from datetime import datetime
-from brand_to_generic import brand_lookup
 from dbi_mcp import dbi_mcp, dbi_mcp_mixed_routes
 from clinical_calculators import (
     cockcroft_gault_creatinine_clearance,
@@ -33,6 +33,64 @@ from adr_analysis import (
 )
 import time
 import sys
 @with_error_handling
@@ -47,10 +105,7 @@ def _brand_lookup_gradio(brand_name: str, prefer_countries_str: str = ""):
     return standardize_response(result, "brand_to_generic")
-@with_error_handling
-def _dbi_mcp_gradio(text_block: str, route: str = "oral"):
-    result = dbi_mcp(text_block, route=route, ref_csv="dbi_reference_by_route.csv")
-    return standardize_response(result, "dbi_calculator")
 @with_error_handling
@@ -224,45 +279,28 @@ def drug_livertox_summary_mcp(drug_name: str) -> str:
 @with_error_handling
-def brand_to_generic_lookup_mcp(brand_name: str, prefer_countries: str = "US") -> str:
     """
     Look up generic drug information from brand names.
     Args:
         brand_name (str): Brand name to look up
-        prefer_countries (str): Comma-separated ISO country codes (e.g., "US,CA")
     Returns:
         str: JSON string with generic drug information and country-specific data
     """
-    result = _brand_lookup_gradio(brand_name, prefer_countries)
-    return format_json_output(result)
-@with_error_handling
-def calculate_drug_burden_index_mcp(drug_list: str, route: str = "oral") -> str:
-    """
-    Calculate Drug Burden Index (DBI) from a list of medications.
-    Args:
-        drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
-        route (str): Route of administration (default: "oral")
-    Returns:
-        str: JSON string with DBI calculation results and individual drug contributions
-    """
-    result = _dbi_mcp_gradio(drug_list, route)
     return format_json_output(result)
 @with_error_handling
-def calculate_drug_burden_index_mixed_routes_mcp(drug_list: str) -> str:
     """
     Calculate Drug Burden Index (DBI) from a list of medications with automatic route detection.
-    This enhanced version automatically detects the route of administration for each medication
     (oral, transdermal patches, parenteral injections, etc.) and uses the appropriate reference
-    data for each route. Perfect for mixed medication lists.
     Args:
         drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
@@ -278,6 +316,9 @@ def calculate_drug_burden_index_mixed_routes_mcp(drug_list: str) -> str:
     return format_json_output(result)
 @with_error_handling
 def calculate_creatinine_clearance_mcp(
     age: str, weight_kg: str, serum_creatinine: str, is_female: str
@@ -746,10 +787,6 @@ brand_generic_ui = gr.Interface(
     fn=brand_to_generic_lookup_mcp,
     inputs=[
         gr.Text(label="Brand Name"),
-        gr.Text(
-            label="Preferred Countries (comma-separated ISO codes, e.g., US,CA)",
-            value="US",
-        ),
     ],
     outputs=gr.JSON(label="Output"),
     title="Brand to Generic",
@@ -759,22 +796,6 @@ brand_generic_ui = gr.Interface(
 dbi_calculator_ui = gr.Interface(
     fn=calculate_drug_burden_index_mcp,
-    inputs=[
-        gr.Textbox(
-            label="Drug List (one per line, include dose and frequency)",
-            lines=10,
-            placeholder="e.g., Aspirin 100mg daily\nFurosemide 40mg PRN",
-        ),
-        gr.Text(label="Route of Administration", value="oral"),
-    ],
-    outputs=gr.JSON(label="DBI Calculation"),
-    title="DBI Calculator (Single Route)",
-    api_name="dbi_calculator",
-    description="Calculate Drug Burden Index (DBI) from a list of medications. Supports PRN and various dose formats.",
-)
-dbi_mixed_routes_ui = gr.Interface(
-    fn=calculate_drug_burden_index_mixed_routes_mcp,
     inputs=[
         gr.Textbox(
             label="Drug List (one per line, include dose and frequency)",
@@ -783,11 +804,13 @@ dbi_mixed_routes_ui = gr.Interface(
         ),
     ],
     outputs=gr.JSON(label="DBI Calculation with Route Detection"),
-    title="DBI Calculator (Mixed Routes)",
-    api_name="dbi_calculator_mixed_routes",
-    description="Enhanced DBI calculator that automatically detects routes (oral, patches, injections, etc.) and uses appropriate reference data for each medication.",
 )
 cockcroft_gault_ui = gr.Interface(
     fn=calculate_creatinine_clearance_mcp,
     inputs=[
@@ -894,7 +917,6 @@ demo = gr.TabbedInterface(
         livertox_ui,
         brand_generic_ui,
         dbi_calculator_ui,
-        dbi_mixed_routes_ui,
         cockcroft_gault_ui,
         ckd_epi_ui,
         child_pugh_ui,
@@ -913,7 +935,6 @@ demo = gr.TabbedInterface(
         "LiverTox",
         "Brand to Generic",
         "DBI Calculator",
-        "DBI Mixed Routes",
         "Creatinine CL",
         "eGFR",
         "Child-Pugh",

 import gradio as gr
 from typing import Dict, Any
+from datetime import datetime, timedelta
+from brand_to_generic import brand_lookup, set_pbs_data
 from dbi_mcp import dbi_mcp, dbi_mcp_mixed_routes
 from clinical_calculators import (
     cockcroft_gault_creatinine_clearance,
 )
 import time
 import sys
+import logging
+from apscheduler.schedulers.background import BackgroundScheduler
+import pandas as pd
+try:
+    from datasets import load_dataset
+    HAVE_DATASETS = True
+except ImportError:
+    HAVE_DATASETS = False
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def load_pbs_data():
+    """Load PBS data from Hugging Face Hub, with fallback to previous month."""
+    if not HAVE_DATASETS:
+        logger.warning("`datasets` library not installed. Skipping PBS data load.")
+        set_pbs_data(pd.DataFrame())
+        return
+    today = datetime.now()
+    current_month_str = today.strftime("%Y-%m")
+    first_day_current_month = today.replace(day=1)
+    last_day_last_month = first_day_current_month - timedelta(days=1)
+    last_month_str = last_day_last_month.strftime("%Y-%m")
+    loaded = False
+    for month_str in [current_month_str, last_month_str]:
+        try:
+            logger.info(f"Attempting to load PBS data for {month_str}")
+            ds = load_dataset("cmcmaster/pbs_items", month_str, trust_remote_code=True)
+            if 'train' in ds:
+                pbs_df = ds['train'].to_pandas()
+                set_pbs_data(pbs_df)
+                logger.info(f"Successfully loaded PBS data for {month_str}. Shape: {pbs_df.shape}")
+                loaded = True
+                break
+            else:
+                logger.error(f"No 'train' split found in dataset for month {month_str}")
+        except Exception as e:
+            logger.warning(f"Failed to load PBS data for {month_str}: {e}")
+    if not loaded:
+        logger.error(f"Failed to load PBS data for both {current_month_str} and {last_month_str}. PBS lookups will be disabled.")
+        set_pbs_data(pd.DataFrame())
+# Initial load on startup
+logger.info("Performing initial load of PBS data...")
+load_pbs_data()
+# Schedule daily refresh
+scheduler = BackgroundScheduler(daemon=True)
+scheduler.add_job(load_pbs_data, 'interval', days=1)
+scheduler.start()
 @with_error_handling
     return standardize_response(result, "brand_to_generic")
 @with_error_handling
 @with_error_handling
+def brand_to_generic_lookup_mcp(brand_name: str) -> str:
     """
     Look up generic drug information from brand names.
     Args:
         brand_name (str): Brand name to look up
     Returns:
         str: JSON string with generic drug information and country-specific data
     """
+    result = _brand_lookup_gradio(brand_name)
     return format_json_output(result)
 @with_error_handling
+def calculate_drug_burden_index_mcp(drug_list: str) -> str:
     """
     Calculate Drug Burden Index (DBI) from a list of medications with automatic route detection.
+    This intelligent version automatically detects the route of administration for each medication
     (oral, transdermal patches, parenteral injections, etc.) and uses the appropriate reference
+    data for each route. Perfect for real-world medication lists with mixed formulations.
     Args:
         drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
     return format_json_output(result)
 @with_error_handling
 def calculate_creatinine_clearance_mcp(
     age: str, weight_kg: str, serum_creatinine: str, is_female: str
     fn=brand_to_generic_lookup_mcp,
     inputs=[
         gr.Text(label="Brand Name"),
     ],
     outputs=gr.JSON(label="Output"),
     title="Brand to Generic",
 dbi_calculator_ui = gr.Interface(
     fn=calculate_drug_burden_index_mcp,
     inputs=[
         gr.Textbox(
             label="Drug List (one per line, include dose and frequency)",
         ),
     ],
     outputs=gr.JSON(label="DBI Calculation with Route Detection"),
+    title="DBI Calculator",
+    api_name="dbi_calculator",
+    description="Intelligent DBI calculator that automatically detects routes (oral, patches, injections, etc.) and uses appropriate reference data for each medication.",
 )
 cockcroft_gault_ui = gr.Interface(
     fn=calculate_creatinine_clearance_mcp,
     inputs=[
         livertox_ui,
         brand_generic_ui,
         dbi_calculator_ui,
         cockcroft_gault_ui,
         ckd_epi_ui,
         child_pugh_ui,
         "LiverTox",
         "Brand to Generic",
         "DBI Calculator",
         "Creatinine CL",
         "eGFR",
         "Child-Pugh",

brand_to_generic.py CHANGED Viewed

@@ -9,6 +9,10 @@ from typing import Dict, List, Optional
 import requests
 import csv
 from io import StringIO
 logger = logging.getLogger(__name__)
@@ -18,6 +22,28 @@ _session = requests.Session()
 DEFAULT_TIMEOUT = 5  # Reduced from 10
 FAST_TIMEOUT = 3     # For quick checks
 class _Throttle:
     """Simple host-level throttle (~1 rps)."""
@@ -58,6 +84,7 @@ _RX_RE_FMT = (
 @functools.lru_cache(maxsize=512)
 def _rxnorm_lookup(brand: str):
     r = _get("https://rxnav.nlm.nih.gov/REST/rxcui.json", params={"name": brand})
     if not r or not r.json().get("idGroup", {}).get("rxnormId"):
         return []
@@ -89,6 +116,7 @@ _OPENFDA_NDC = "https://api.fda.gov/drug/ndc.json"
 @functools.lru_cache(maxsize=512)
 def _openfda_ndc(brand: str):
     r = _get(_OPENFDA_NDC, params={"search": f'brand_name:"{brand}"', "limit": 20})
     if not r:
         return []
@@ -125,6 +153,7 @@ _DPD = "https://health-products.canada.ca/api/drug/drugproduct/"
 @functools.lru_cache(maxsize=512)
 def _dpd_lookup(brand: str):
     r = _get(_DPD, params={"brandname": brand, "lang": "en", "type": "json"})
     if not r:
         return []
@@ -146,37 +175,12 @@ def _dpd_lookup(brand: str):
     return out
-_PBS_V3_BASE_URL = "https://data-api.health.gov.au/pbs/api/v3"
-_PBS_SUBSCRIPTION_KEY = os.getenv(
-    "PBS_API_SUBSCRIPTION_KEY", "2384af7c667342ceb5a736fe29f1dc6b"
-)
-def _pbs_v3_get(
-    endpoint: str, params: Optional[Dict] = None, accept_type: str = "application/json"
-):
-    """Helper to make GET requests to PBS API v3 with auth and throttling."""
-    url = f"{_PBS_V3_BASE_URL}/{endpoint}"
-    headers = {"subscription-key": _PBS_SUBSCRIPTION_KEY, "Accept": accept_type}
-    host = requests.utils.urlparse(url).netloc
-    _Throttle.wait(host, gap=5.0)  # PBS API specific throttle (1 req per 5 sec)
-    try:
-        r = _session.get(url, headers=headers, params=params, timeout=20)
-        r.raise_for_status()
-        return r
-    except Exception as exc:
-        logger.warning(
-            "PBS API v3 request failed for %s (params: %s): %s", url, params, exc
-        )
-        return None
 def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
     """Parses strength and dosage form from an li_form string."""
     if not li_form_str:
         return {"strength": None, "dosage_form": None}
-    strength_regex = r"(\\d[\\d.,\\s]*(?:mg|mcg|g|mL|L|microlitres|nanograms|IU|%|mmol)(?:[\\s\\/][\\d.,\\s]*(?:mg|mcg|g|mL|L|microlitres|dose(?:s)?))?(?:\\s*\\(.*?\\))?(?:\\s+in\\s+[\\d.,\\s]*(?:mL|L|g|mg))?)"
     strength_match = re.search(strength_regex, li_form_str, re.IGNORECASE)
@@ -196,7 +200,7 @@ def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
             extracted_form = form_after
     if not extracted_form and not extracted_strength:
-        if not re.search(r"\\d", li_form_str):
             extracted_form = li_form_str.strip()
         else:
             extracted_form = li_form_str.strip()
@@ -209,89 +213,48 @@ def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
 @functools.lru_cache(maxsize=512)
 def _pbs_lookup(brand: str):
-    schedules_resp = _pbs_v3_get("schedules", params={"limit": 1})
-    if not schedules_resp:
         return []
-    try:
-        schedules_data = schedules_resp.json()
-        if not schedules_data.get("data") or not schedules_data["data"][0].get(
-            "schedule_code"
-        ):
-            logger.warning(
-                "PBS API v3: Could not get schedule code from response: %s",
-                schedules_data,
-            )
-            return []
-        schedule_code = schedules_data["data"][0]["schedule_code"]
-    except (ValueError, IndexError, KeyError) as e:
-        logger.warning("PBS API v3: Error parsing schedules response: %s", e)
         return []
-    items_resp = _pbs_v3_get(
-        "items",
-        params={"schedule_code": schedule_code, "brand_name": brand, "limit": 20},
-        accept_type="text/csv",
-    )
-    if not items_resp:
         return []
     out = []
-    try:
-        csv_text = items_resp.text
-        if not csv_text.strip():
-            logger.info(
-                "PBS API v3: Received empty CSV for brand '%s' with schedule '%s'",
-                brand,
-                schedule_code,
-            )
-            return []
-        csvfile = StringIO(csv_text)
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            li_form = row.get("li_form")
-            parsed_form_strength = _parse_li_form(li_form)
-            generic_name = row.get("drug_name", "").strip() or None
-            query_params = {
-                "schedule_code": schedule_code,
-                "brand_name": requests.utils.quote(brand),
             }
-            source_url_params = "&".join([f"{k}={v}" for k, v in query_params.items()])
-            source_url = f"{_PBS_V3_BASE_URL}/items?{source_url_params}"
-            out.append(
-                {
-                    "generic_name": generic_name,
-                    "strength": parsed_form_strength["strength"],
-                    "dosage_form": parsed_form_strength["dosage_form"],
-                    "route": row.get("manner_of_administration", "").strip() or None,
-                    "country": "AU",
-                    "source": "PBS API v3",
-                    "ids": {"pbs_item_code": row.get("pbs_code", "").strip()},
-                    "source_url": source_url,
-                }
-            )
-    except csv.Error as e:
-        logger.warning(
-            "PBS API v3: CSV parsing error for brand '%s': %s. CSV content: %s",
-            brand,
-            e,
-            csv_text[:500],
         )
-        return []
-    except Exception as e:
-        logger.exception(
-            "PBS API v3: Unexpected error processing items for brand '%s': %s", brand, e
-        )
-        return []
     return out
 @functools.lru_cache(maxsize=512)
 def _pubchem_synonym_lookup(brand: str):
     url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{requests.utils.quote(brand)}/synonyms/JSON"
     r = _get(url)
     if not r:
@@ -333,9 +296,9 @@ def brand_lookup(
     for fn in (
         _pbs_lookup,
-        _rxnorm_lookup,
-        _openfda_ndc,
-        _dpd_lookup,
         _pubchem_synonym_lookup,
     ):
         try:

 import requests
 import csv
 from io import StringIO
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
 logger = logging.getLogger(__name__)
 DEFAULT_TIMEOUT = 5  # Reduced from 10
 FAST_TIMEOUT = 3     # For quick checks
+# Global to hold PBS data
+pbs_data: Optional["pd.DataFrame"] = None
+# Testing mode flag to disable external API calls
+TESTING_MODE = False
+def set_pbs_data(data: "pd.DataFrame"):
+    """Sets the global PBS dataframe."""
+    global pbs_data
+    pbs_data = data
+    if pbs_data is not None:
+        logger.info(f"PBS data updated. Shape: {pbs_data.shape}")
+    else:
+        logger.info("PBS data cleared.")
+def set_testing_mode(is_testing: bool):
+    """Enable/disable testing mode to bypass external API calls."""
+    global TESTING_MODE
+    TESTING_MODE = is_testing
+    if TESTING_MODE:
+        logger.warning("Testing mode is enabled. External API calls will be bypassed.")
 class _Throttle:
     """Simple host-level throttle (~1 rps)."""
 @functools.lru_cache(maxsize=512)
 def _rxnorm_lookup(brand: str):
+    if TESTING_MODE: return []
     r = _get("https://rxnav.nlm.nih.gov/REST/rxcui.json", params={"name": brand})
     if not r or not r.json().get("idGroup", {}).get("rxnormId"):
         return []
 @functools.lru_cache(maxsize=512)
 def _openfda_ndc(brand: str):
+    if TESTING_MODE: return []
     r = _get(_OPENFDA_NDC, params={"search": f'brand_name:"{brand}"', "limit": 20})
     if not r:
         return []
 @functools.lru_cache(maxsize=512)
 def _dpd_lookup(brand: str):
+    if TESTING_MODE: return []
     r = _get(_DPD, params={"brandname": brand, "lang": "en", "type": "json"})
     if not r:
         return []
     return out
 def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
     """Parses strength and dosage form from an li_form string."""
     if not li_form_str:
         return {"strength": None, "dosage_form": None}
+    strength_regex = r"(\d[\d.,\s]*(?:mg|mcg|g|mL|L|microlitres|nanograms|IU|%|mmol)(?:[\s\/][\d.,\s]*(?:mg|mcg|g|mL|L|microlitres|dose(?:s)?))?(?:\s*\(.*?\))?(?:\s+in\s+[\d.,\s]*(?:mL|L|g|mg))?)"
     strength_match = re.search(strength_regex, li_form_str, re.IGNORECASE)
             extracted_form = form_after
     if not extracted_form and not extracted_strength:
+        if not re.search(r"\d", li_form_str):
             extracted_form = li_form_str.strip()
         else:
             extracted_form = li_form_str.strip()
 @functools.lru_cache(maxsize=512)
 def _pbs_lookup(brand: str):
+    if pbs_data is None or pbs_data.empty:
+        logger.warning("PBS data not loaded or empty. Skipping PBS lookup for '%s'.", brand)
         return []
+    brand_lower = brand.lower()
+    if 'brand_name' not in pbs_data.columns:
+        logger.error("PBS data does not contain 'brand_name' column. Skipping lookup.")
         return []
+    results_df = pbs_data[pbs_data['brand_name'].str.lower() == brand_lower]
+    if results_df.empty:
         return []
     out = []
+    source_url = "https://huggingface.co/datasets/cmcmaster/pbs_items"
+    for _, row in results_df.iterrows():
+        li_form = row.get("li_form")
+        parsed_form_strength = _parse_li_form(li_form)
+        generic_name = row.get("drug_name", "").strip() or None
+        out.append(
+            {
+                "generic_name": generic_name,
+                "strength": parsed_form_strength["strength"],
+                "dosage_form": parsed_form_strength["dosage_form"],
+                "route": row.get("manner_of_administration", "").strip() or None,
+                "country": "AU",
+                "source": "PBS (via Hugging Face Dataset)",
+                "ids": {"pbs_item_code": row.get("pbs_code", "").strip()},
+                "source_url": source_url,
             }
         )
     return out
 @functools.lru_cache(maxsize=512)
 def _pubchem_synonym_lookup(brand: str):
+    if TESTING_MODE: return []
     url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{requests.utils.quote(brand)}/synonyms/JSON"
     r = _get(url)
     if not r:
     for fn in (
         _pbs_lookup,
+        # _rxnorm_lookup,  These three fail, so skip them for now
+        # _openfda_ndc,
+        # _dpd_lookup,
         _pubchem_synonym_lookup,
     ):
         try:

dbi_mcp.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Dict, List, Tuple, Optional, Union, Mapping, Sequence
 from brand_to_generic import brand_lookup
 import csv
 try:
     import pandas as pd
@@ -23,8 +24,11 @@ __all__ = [
     "calculate_dbi",
     "print_report",
     "detect_route_from_text",
     "dbi_mcp",
     "dbi_mcp_mixed_routes",
 ]
 PatientInput = Union[
@@ -33,6 +37,15 @@ PatientInput = Union[
     Mapping[str, float],
 ]
 # Route detection patterns
 ROUTE_PATTERNS = {
     'transdermal': [
@@ -96,6 +109,107 @@ def detect_route_from_text(text: str) -> str:
     return 'oral'
 def load_reference(
     ref_path: Path,
     *,
@@ -204,15 +318,17 @@ def calculate_dbi(
 logger = logging.getLogger(__name__)
-UNIT_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(?P<unit>mcg|μg|mg|g)\b", re.I)
-PATCH_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(mcg|μg)\s*/\s*hr", re.I)
-CONC_PAT = re.compile(r"(?P<drug_val>\d+(?:[.,]\d+)?)(?:\s*)(?P<drug_unit>mcg|μg|mg|g)\s*/\s*(?P<vol_val>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
 VOL_PAT = re.compile(r"(?P<voldose>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
-QTY_PAT = re.compile(r"(?<!\d)(?P<qty>\d+)\s*(?:tab|caps?|puff|spray|patch|patches)s?\b", re.I)
 FREQ_PAT = re.compile(r"\b(q\d{1,2}h|qd|od|daily|once daily|bid|bd|twice daily|tid|tds|three times daily|qid|four times daily|nocte|mane|am|pm)\b", re.I)
 EVERY_HOURS_PAT = re.compile(r"q(\d{1,2})h", re.I)
@@ -227,13 +343,16 @@ _FREQ_MAP = {
 }
 def _unit_to_mg(val: float, unit: str) -> float:
-    unit = unit.lower()
     if unit == "mg":
         return val
-    if unit in {"g"}:
         return val * 1_000
-    if unit in {"mcg", "μg"}:
         return val / 1_000
     return math.nan
@@ -247,7 +366,8 @@ def _freq_to_per_day(token: str) -> float:
         return 24 / hrs if hrs else 1
     return 1
-Parsed = Tuple[str, float, bool, str]  # Added route detection
 @functools.lru_cache(maxsize=2048)
 def _parse_line(line: str) -> Optional[Parsed]:
@@ -266,6 +386,65 @@ def _parse_line(line: str) -> Optional[Parsed]:
         # Override route detection for patches
         return (name_part, mg_day, is_prn, "transdermal")
     m_conc = CONC_PAT.search(original)
     m_vol  = VOL_PAT.search(original)
     if m_conc and m_vol:
@@ -288,10 +467,17 @@ def _parse_line(line: str) -> Optional[Parsed]:
     m = UNIT_PAT.search(original)
     if m:
         strength_mg = _unit_to_mg(float(m.group("val").replace(",", ".")), m.group("unit"))
-        qty = 1
         m_qty = QTY_PAT.search(original)
         if m_qty:
-            qty = int(m_qty.group("qty"))
         freq = 1.0
         m_freq = FREQ_PAT.search(original)
         if m_freq:
@@ -302,9 +488,61 @@ def _parse_line(line: str) -> Optional[Parsed]:
         name_part = re.sub(r"\s+", " ", name_part).strip()
         return (name_part, mg_day, is_prn, detected_route)
     logger.debug("unhandled line: %s", original)
     return None
 def _smart_drug_lookup(raw_name: str, all_routes_reference: Dict[str, Dict[str, Tuple[float, str]]]) -> str:
     """
     Smart drug name resolution that avoids unnecessary API calls.
@@ -514,6 +752,159 @@ def dbi_mcp_mixed_routes(text_block: str, *, ref_csv: Union[str, Path] = "dbi_re
     }
 if __name__ == "__main__":
     import sys
     import pprint

 from brand_to_generic import brand_lookup
 import csv
+import json
 try:
     import pandas as pd
     "calculate_dbi",
     "print_report",
     "detect_route_from_text",
+    "detect_combination_drug",
+    "split_combination_drug_simple",
     "dbi_mcp",
     "dbi_mcp_mixed_routes",
+    "dbi_mcp_with_combinations",
 ]
 PatientInput = Union[
     Mapping[str, float],
 ]
+# Combination drug detection patterns
+COMBINATION_PATTERNS = [
+    r'\bco-?\w+\b',     # co- prefix with optional hyphen (co-codamol, cocodamol)
+    r'\b\w+[-/]\w+\b',  # hyphen or slash separated (paracetamol-codeine, aspirin/caffeine)
+    r'\b\w+\s*\+\s*\w+\b',  # plus sign (aspirin + caffeine)
+    r'\b\w+\s*with\s+\w+\b',  # "with" combinations
+    r'\b\w+\s*and\s+\w+\b',   # "and" combinations
+]
 # Route detection patterns
 ROUTE_PATTERNS = {
     'transdermal': [
     return 'oral'
+def detect_combination_drug(drug_name: str) -> bool:
+    """
+    Detect if a drug name appears to be a combination drug.
+    """
+    drug_name_lower = drug_name.lower()
+    for pattern in COMBINATION_PATTERNS:
+        if re.search(pattern, drug_name_lower):
+            return True
+    # Check for multiple doses in parentheses (e.g., "500mg-9.6mg")
+    if re.search(r'\d+(?:\.\d+)?\s*mg\s*[-/]\s*\d+(?:\.\d+)?\s*mg', drug_name_lower):
+        return True
+    return False
+def split_combination_drug_simple(drug_text: str) -> List[Tuple[str, str, str]]:
+    """
+    Simple rule-based splitting for common combination patterns.
+    Returns list of (component_name, original_text, notes).
+    """
+    components = []
+    drug_text_lower = drug_text.lower()
+    # Handle common combinations
+    known_combinations = {
+        'co-codamol': [('paracetamol', 'paracetamol component of co-codamol'),
+                       ('codeine', 'codeine component of co-codamol')],
+        'cocodamol': [('paracetamol', 'paracetamol component of co-codamol'),
+                      ('codeine', 'codeine component of co-codamol')],
+        'co-trimoxazole': [('trimethoprim', 'trimethoprim component of co-trimoxazole'),
+                          ('sulfamethoxazole', 'sulfamethoxazole component of co-trimoxazole')],
+        'cotrimoxazole': [('trimethoprim', 'trimethoprim component of co-trimoxazole'),
+                        ('sulfamethoxazole', 'sulfamethoxazole component of co-trimoxazole')],
+        'paracetamol-codeine': [('paracetamol', 'paracetamol component'),
+                               ('codeine', 'codeine component')],
+        'aspirin-caffeine': [('aspirin', 'aspirin component'),
+                            ('caffeine', 'caffeine component')],
+        'tylenol-codeine': [('paracetamol', 'paracetamol component'),
+                           ('codeine', 'codeine component')],
+        # Brand name combinations
+        'vytorin': [('ezetimibe', 'ezetimibe component of Vytorin'),
+                   ('simvastatin', 'simvastatin component of Vytorin')],
+        'exforge': [('amlodipine', 'amlodipine component of Exforge'),
+                   ('valsartan', 'valsartan component of Exforge')],
+        'caduet': [('amlodipine', 'amlodipine component of Caduet'),
+                  ('atorvastatin', 'atorvastatin component of Caduet')],
+        'janumet': [('sitagliptin', 'sitagliptin component of Janumet'),
+                   ('metformin', 'metformin component of Janumet')],
+        'combigan': [('brimonidine', 'brimonidine component of Combigan'),
+                    ('timolol', 'timolol component of Combigan')],
+    }
+    # Check for known combinations
+    for combo_name, combo_components in known_combinations.items():
+        if combo_name in drug_text_lower:
+            for comp_name, note in combo_components:
+                components.append((comp_name, drug_text, note))
+            return components
+    # Try to split hyphenated/slashed combinations
+    if '-' in drug_text or '/' in drug_text:
+        # Extract the drug name part (before dosing info)
+        drug_name_part = re.split(r'\d+', drug_text)[0].strip()
+        separators = ['-', '/', '+']
+        for sep in separators:
+            if sep in drug_name_part:
+                parts = [part.strip() for part in drug_name_part.split(sep)]
+                if len(parts) == 2:
+                    for part in parts:
+                        if part and len(part) > 2:  # Avoid single letters
+                            components.append((part, drug_text, f'Component of combination drug'))
+                    return components
+    return components
+def needs_llm_splitting(drug_text: str) -> bool:
+    """
+    Determine if a combination drug needs LLM assistance for splitting.
+    """
+    if not detect_combination_drug(drug_text):
+        return False
+    # Try simple splitting first
+    simple_components = split_combination_drug_simple(drug_text)
+    # If simple splitting failed or returned unclear results, use LLM
+    if not simple_components:
+        return True
+    # If components are too short or unclear, use LLM
+    for comp_name, _, _ in simple_components:
+        if len(comp_name) < 3 or comp_name.isdigit():
+            return True
+    return False
 def load_reference(
     ref_path: Path,
     *,
 logger = logging.getLogger(__name__)
+UNIT_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(?P<unit>mcg|μg|mg|g|iu|units?|micrograms?|mmol)\b", re.I)
+PATCH_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(mcg|μg|microg)\s*/\s*hr", re.I)
+PERCENT_PAT = re.compile(r"\b(?P<percent>\d+(?:\.\d+)?)\s*%\b")
+CONC_PAT = re.compile(r"(?P<drug_val>\d+(?:[.,]\d+)?)(?:\s*)(?P<drug_unit>mcg|μg|mg|g|iu|units?)\s*/\s*(?P<vol_val>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
 VOL_PAT = re.compile(r"(?P<voldose>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
+QTY_PAT = re.compile(r"(?<!\d)(?P<qty>\d+(?:\s*-\s*\d+)?)\s*(?:tab|caps?|puff|spray|patch|patches|sachet|tube|inhalation|drop)s?\b", re.I)
 FREQ_PAT = re.compile(r"\b(q\d{1,2}h|qd|od|daily|once daily|bid|bd|twice daily|tid|tds|three times daily|qid|four times daily|nocte|mane|am|pm)\b", re.I)
 EVERY_HOURS_PAT = re.compile(r"q(\d{1,2})h", re.I)
 }
 def _unit_to_mg(val: float, unit: str) -> float:
+    unit = unit.lower().removesuffix('s')
     if unit == "mg":
         return val
+    if unit == "g":
         return val * 1_000
+    if unit in {"mcg", "μg", "microgram"}:
         return val / 1_000
+    if unit in {"iu", "unit", "mmol"}:
+        logger.debug("Cannot reliably convert '%s' to mg. Returning 0.", unit)
+        return 0.0
     return math.nan
         return 24 / hrs if hrs else 1
     return 1
+Parsed = Tuple[str, float, bool, str]  # (name, mg_day, is_prn, route)
+ParsedCombination = Tuple[str, float, bool, str, bool, List[Tuple[str, str, str]]]  # (name, mg_day, is_prn, route, is_combination, components)
 @functools.lru_cache(maxsize=2048)
 def _parse_line(line: str) -> Optional[Parsed]:
         # Override route detection for patches
         return (name_part, mg_day, is_prn, "transdermal")
+    # Try parsing percentage-based topicals/solutions before standard units
+    m_percent = PERCENT_PAT.search(original)
+    if m_percent:
+        percent_val = float(m_percent.group("percent"))
+        # For liquids where volume is given (e.g., 2% solution, 10mL dose)
+        m_vol = VOL_PAT.search(original)
+        if m_vol:
+            voldose_ml = float(m_vol.group("voldose").replace(",", "."))
+            # Assume % is g/100mL for liquids
+            strength_g_per_100ml = percent_val
+            mg_per_dose = (strength_g_per_100ml * 1000) * (voldose_ml / 100)
+            freq = 1.0
+            m_freq = FREQ_PAT.search(original)
+            if m_freq:
+                freq = _freq_to_per_day(m_freq.group(0))
+            mg_day = mg_per_dose * freq
+            name_part = original[:m_percent.start()].strip()
+            name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
+            return (name_part, mg_day, is_prn, detected_route)
+        # Handle drops with percentage strength
+        if 'drop' in original.lower():
+            # Assume 20 drops/mL for ophthalmic solutions
+            g_per_100ml = percent_val
+            mg_per_ml = g_per_100ml * 10  # 1% -> 1g/100mL -> 10mg/mL
+            qty = 1.0
+            m_qty = QTY_PAT.search(original)  # QTY_PAT now includes 'drop'
+            if m_qty:
+                qty_str = m_qty.group("qty").split('-')[-1].strip() # Use upper end of range
+                try:
+                    qty = float(qty_str)
+                except ValueError:
+                    qty = 1.0
+            # Dose in mg = (number of drops / 20 drops_per_mL) * mg_per_mL
+            mg_per_dose = (qty / 20.0) * mg_per_ml
+            freq = 1.0
+            m_freq = FREQ_PAT.search(original)
+            if m_freq:
+                freq = _freq_to_per_day(m_freq.group(0))
+            mg_day = mg_per_dose * freq
+            name_part = original[:m_percent.start()].strip()
+            name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
+            return (name_part, mg_day, is_prn, detected_route)
+        # For cases with 'application' or 'drop' (e.g., 0.05% cream, 1 application)
+        if 'application' in original.lower() or 'ointment' in original.lower():
+            # Can't calculate mg dose, but we can parse the drug name.
+            name_part = original[:m_percent.start()].strip()
+            name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
+            logger.debug("Parsed %%-based item but cannot quantify mg/day: %s", original)
+            return (name_part, 0.0, is_prn, detected_route)
     m_conc = CONC_PAT.search(original)
     m_vol  = VOL_PAT.search(original)
     if m_conc and m_vol:
     m = UNIT_PAT.search(original)
     if m:
         strength_mg = _unit_to_mg(float(m.group("val").replace(",", ".")), m.group("unit"))
+        if math.isnan(strength_mg):
+            logger.debug("Unhandled unit '%s' in line: %s", m.group("unit"), original)
+            return None
+        qty = 1.0
         m_qty = QTY_PAT.search(original)
         if m_qty:
+            qty_str = m_qty.group("qty").split('-')[-1].strip()
+            try:
+                qty = float(qty_str)
+            except ValueError:
+                qty = 1.0
         freq = 1.0
         m_freq = FREQ_PAT.search(original)
         if m_freq:
         name_part = re.sub(r"\s+", " ", name_part).strip()
         return (name_part, mg_day, is_prn, detected_route)
+    # Handle unitless doses like "..., 5, oral" or "..., 2.5-5, oral"
+    m_unitless = re.search(r"[,\(]\s*(?P<dose>\d+(?:\.\d+)?(?:\s*-\s*\d+(?:\.\d+)?)?)\s*,\s*(?:oral|sublingual|buccal)", original, re.I)
+    if m_unitless:
+        dose_str = m_unitless.group("dose").split('-')[-1].strip()
+        try:
+            strength_mg = float(dose_str) # Assume mg
+            freq = 1.0
+            m_freq = FREQ_PAT.search(original)
+            if m_freq:
+                freq = _freq_to_per_day(m_freq.group(0))
+            mg_day = strength_mg * freq
+            name_part = original[:m_unitless.start()].strip()
+            name_part = re.sub(r"\(.*?\)", "", name_part).strip() # Remove bracketed part of name
+            return (name_part, mg_day, is_prn, detected_route)
+        except ValueError:
+            pass # Could not convert to float
     logger.debug("unhandled line: %s", original)
     return None
+def _parse_line_with_combinations(line: str) -> Optional[ParsedCombination]:
+    """
+    Enhanced parsing that detects and handles combination drugs.
+    Returns (name, mg_day, is_prn, route, is_combination, components)
+    """
+    # First try normal parsing
+    parsed = _parse_line(line)
+    if not parsed:
+        return None
+    name, mg_day, is_prn, route = parsed
+    # Check if this is a combination drug (check both the name and the full line)
+    is_combo_name = detect_combination_drug(name)
+    is_combo_line = detect_combination_drug(line)
+    if is_combo_name or is_combo_line:
+        # Try splitting with both the name and the full line
+        components = split_combination_drug_simple(name)
+        if not components:
+            components = split_combination_drug_simple(line)
+        if components:
+            logger.debug(f"Detected combination drug: {name} -> {[c[0] for c in components]}")
+            return (name, mg_day, is_prn, route, True, components)
+        else:
+            logger.debug(f"Combination drug detected but couldn't split: {name}")
+            # Mark as combination but with empty components (may need LLM splitting)
+            return (name, mg_day, is_prn, route, True, [])
+    # Not a combination drug
+    return (name, mg_day, is_prn, route, False, [])
 def _smart_drug_lookup(raw_name: str, all_routes_reference: Dict[str, Dict[str, Tuple[float, str]]]) -> str:
     """
     Smart drug name resolution that avoids unnecessary API calls.
     }
+def dbi_mcp_with_combinations(text_block: str, *, ref_csv: Union[str, Path] = "dbi_reference_by_route.csv") -> dict:
+    """
+    Enhanced DBI calculator that handles combination drugs automatically.
+    This function:
+    1. Detects combination drugs (e.g., paracetamol-codeine, co-codamol)
+    2. Splits them into individual components
+    3. Calculates DBI for each relevant component
+    4. Provides detailed breakdown including combination drug handling
+    """
+    all_routes_ref = load_all_routes_reference(Path(ref_csv))
+    parsed_combinations: List[ParsedCombination] = []
+    unmatched: List[str] = []
+    route_stats: Dict[str, int] = {}
+    combination_drugs: List[Dict] = []
+    for ln in text_block.splitlines():
+        res = _parse_line_with_combinations(ln)
+        if res:
+            parsed_combinations.append(res)
+            route = res[3]  # detected route
+            route_stats[route] = route_stats.get(route, 0) + 1
+        else:
+            unmatched.append(ln)
+    # Organize medications by route and PRN status, handling combinations
+    meds_by_route_with: Dict[str, Dict[str, float]] = {}
+    meds_by_route_without: Dict[str, Dict[str, float]] = {}
+    medication_details: List[Dict] = []
+    for name, mg_day, is_prn, detected_route, is_combination, components in parsed_combinations:
+        if is_combination and components:
+            # Handle combination drug by processing each component
+            combination_info = {
+                "original_text": f"{name} {mg_day}mg/day",
+                "is_combination": True,
+                "components": [],
+                "detected_route": detected_route,
+                "is_prn": is_prn
+            }
+            for comp_name, original_text, note in components:
+                generic = _smart_drug_lookup(comp_name, all_routes_ref)
+                # Initialize route dictionaries if needed
+                if detected_route not in meds_by_route_with:
+                    meds_by_route_with[detected_route] = {}
+                    meds_by_route_without[detected_route] = {}
+                # Add to appropriate dictionaries
+                # Note: We use the full dose for each component - this may need refinement
+                # based on actual component ratios in the combination
+                meds_by_route_with[detected_route][generic] = meds_by_route_with[detected_route].get(generic, 0.0) + mg_day
+                if not is_prn:
+                    meds_by_route_without[detected_route][generic] = meds_by_route_without[detected_route].get(generic, 0.0) + mg_day
+                combination_info["components"].append({
+                    "component_name": comp_name,
+                    "generic_name": generic,
+                    "note": note,
+                    "dose_mg_day": mg_day  # This is simplified - real combinations need dose splitting
+                })
+            combination_drugs.append(combination_info)
+            medication_details.append(combination_info)
+        else:
+            # Handle single drug (or unresolved combination)
+            generic = _smart_drug_lookup(name, all_routes_ref)
+            # Initialize route dictionaries if needed
+            if detected_route not in meds_by_route_with:
+                meds_by_route_with[detected_route] = {}
+                meds_by_route_without[detected_route] = {}
+            # Add to appropriate dictionaries
+            meds_by_route_with[detected_route][generic] = meds_by_route_with[detected_route].get(generic, 0.0) + mg_day
+            if not is_prn:
+                meds_by_route_without[detected_route][generic] = meds_by_route_without[detected_route].get(generic, 0.0) + mg_day
+            # Store medication details
+            medication_details.append({
+                "original_text": f"{name} {mg_day}mg/day",
+                "generic_name": generic,
+                "dose_mg_day": mg_day,
+                "detected_route": detected_route,
+                "is_prn": is_prn,
+                "is_combination": is_combination,
+                "combination_note": "Detected as combination but couldn't split" if is_combination else None
+            })
+    # Calculate DBI for each route (same as before)
+    route_results = {}
+    total_dbi_with = 0.0
+    total_dbi_without = 0.0
+    all_details_with = []
+    all_details_without = []
+    for route in meds_by_route_with.keys():
+        if route in all_routes_ref:
+            route_ref = all_routes_ref[route]
+            # Calculate DBI for this route
+            dbi_with, details_with = calculate_dbi(meds_by_route_with[route], route_ref)
+            dbi_without, details_without = calculate_dbi(meds_by_route_without[route], route_ref)
+            total_dbi_with += dbi_with
+            total_dbi_without += dbi_without
+            # Format details
+            def _format_details(details, route_name):
+                formatted = []
+                for g, d, delta, dbi in details:
+                    formatted.append({
+                        "generic_name": g,
+                        "dose_mg_day": d,
+                        "delta_mg": delta,
+                        "dbi_component": dbi,
+                        "route": route_name
+                    })
+                return formatted
+            route_details_with = _format_details(details_with, route)
+            route_details_without = _format_details(details_without, route)
+            all_details_with.extend(route_details_with)
+            all_details_without.extend(route_details_without)
+            route_results[route] = {
+                "dbi_with_prn": round(dbi_with, 2),
+                "dbi_without_prn": round(dbi_without, 2),
+                "details_with_prn": route_details_with,
+                "details_without_prn": route_details_without,
+                "medication_count": route_stats.get(route, 0)
+            }
+    return {
+        "combination_handling": True,
+        "total_dbi_without_prn": round(total_dbi_without, 2),
+        "total_dbi_with_prn": round(total_dbi_with, 2),
+        "routes_detected": list(route_stats.keys()),
+        "route_statistics": route_stats,
+        "route_breakdown": route_results,
+        "all_details_without_prn": all_details_without,
+        "all_details_with_prn": all_details_with,
+        "medication_details": medication_details,
+        "combination_drugs": combination_drugs,
+        "unmatched_input": unmatched,
+    }
 if __name__ == "__main__":
     import sys
     import pprint

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
-gradio[mcp]
 requests
 datasets
 beautifulsoup4
-pandas

 requests
+pandas
+gradio
 datasets
+apscheduler
 beautifulsoup4
+lxml