Chris McMaster commited on
Commit
819adf9
·
1 Parent(s): 04e78e6

Improved drug parsing and generic matching

Browse files
Files changed (5) hide show
  1. .gitignore +14 -0
  2. app.py +74 -53
  3. brand_to_generic.py +63 -100
  4. dbi_mcp.py +401 -10
  5. requirements.txt +4 -2
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ inputs.json
2
+ /venv/
3
+ /.venv/
4
+
5
+ # Standard python project gitignore
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+ *.pyw
11
+ *.pyz
12
+ *.pywz
13
+ *.pyzw
14
+ *.pyzwz
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
  from typing import Dict, Any
3
- from datetime import datetime
4
 
5
- from brand_to_generic import brand_lookup
6
  from dbi_mcp import dbi_mcp, dbi_mcp_mixed_routes
7
  from clinical_calculators import (
8
  cockcroft_gault_creatinine_clearance,
@@ -33,6 +33,64 @@ from adr_analysis import (
33
  )
34
  import time
35
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
 
38
  @with_error_handling
@@ -47,10 +105,7 @@ def _brand_lookup_gradio(brand_name: str, prefer_countries_str: str = ""):
47
  return standardize_response(result, "brand_to_generic")
48
 
49
 
50
- @with_error_handling
51
- def _dbi_mcp_gradio(text_block: str, route: str = "oral"):
52
- result = dbi_mcp(text_block, route=route, ref_csv="dbi_reference_by_route.csv")
53
- return standardize_response(result, "dbi_calculator")
54
 
55
 
56
  @with_error_handling
@@ -224,45 +279,28 @@ def drug_livertox_summary_mcp(drug_name: str) -> str:
224
 
225
 
226
  @with_error_handling
227
- def brand_to_generic_lookup_mcp(brand_name: str, prefer_countries: str = "US") -> str:
228
  """
229
  Look up generic drug information from brand names.
230
 
231
  Args:
232
  brand_name (str): Brand name to look up
233
- prefer_countries (str): Comma-separated ISO country codes (e.g., "US,CA")
234
 
235
  Returns:
236
  str: JSON string with generic drug information and country-specific data
237
  """
238
- result = _brand_lookup_gradio(brand_name, prefer_countries)
239
- return format_json_output(result)
240
-
241
-
242
- @with_error_handling
243
- def calculate_drug_burden_index_mcp(drug_list: str, route: str = "oral") -> str:
244
- """
245
- Calculate Drug Burden Index (DBI) from a list of medications.
246
-
247
- Args:
248
- drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
249
- route (str): Route of administration (default: "oral")
250
-
251
- Returns:
252
- str: JSON string with DBI calculation results and individual drug contributions
253
- """
254
- result = _dbi_mcp_gradio(drug_list, route)
255
  return format_json_output(result)
256
 
257
 
258
  @with_error_handling
259
- def calculate_drug_burden_index_mixed_routes_mcp(drug_list: str) -> str:
260
  """
261
  Calculate Drug Burden Index (DBI) from a list of medications with automatic route detection.
262
 
263
- This enhanced version automatically detects the route of administration for each medication
264
  (oral, transdermal patches, parenteral injections, etc.) and uses the appropriate reference
265
- data for each route. Perfect for mixed medication lists.
266
 
267
  Args:
268
  drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
@@ -278,6 +316,9 @@ def calculate_drug_burden_index_mixed_routes_mcp(drug_list: str) -> str:
278
  return format_json_output(result)
279
 
280
 
 
 
 
281
  @with_error_handling
282
  def calculate_creatinine_clearance_mcp(
283
  age: str, weight_kg: str, serum_creatinine: str, is_female: str
@@ -746,10 +787,6 @@ brand_generic_ui = gr.Interface(
746
  fn=brand_to_generic_lookup_mcp,
747
  inputs=[
748
  gr.Text(label="Brand Name"),
749
- gr.Text(
750
- label="Preferred Countries (comma-separated ISO codes, e.g., US,CA)",
751
- value="US",
752
- ),
753
  ],
754
  outputs=gr.JSON(label="Output"),
755
  title="Brand to Generic",
@@ -759,22 +796,6 @@ brand_generic_ui = gr.Interface(
759
 
760
  dbi_calculator_ui = gr.Interface(
761
  fn=calculate_drug_burden_index_mcp,
762
- inputs=[
763
- gr.Textbox(
764
- label="Drug List (one per line, include dose and frequency)",
765
- lines=10,
766
- placeholder="e.g., Aspirin 100mg daily\nFurosemide 40mg PRN",
767
- ),
768
- gr.Text(label="Route of Administration", value="oral"),
769
- ],
770
- outputs=gr.JSON(label="DBI Calculation"),
771
- title="DBI Calculator (Single Route)",
772
- api_name="dbi_calculator",
773
- description="Calculate Drug Burden Index (DBI) from a list of medications. Supports PRN and various dose formats.",
774
- )
775
-
776
- dbi_mixed_routes_ui = gr.Interface(
777
- fn=calculate_drug_burden_index_mixed_routes_mcp,
778
  inputs=[
779
  gr.Textbox(
780
  label="Drug List (one per line, include dose and frequency)",
@@ -783,11 +804,13 @@ dbi_mixed_routes_ui = gr.Interface(
783
  ),
784
  ],
785
  outputs=gr.JSON(label="DBI Calculation with Route Detection"),
786
- title="DBI Calculator (Mixed Routes)",
787
- api_name="dbi_calculator_mixed_routes",
788
- description="Enhanced DBI calculator that automatically detects routes (oral, patches, injections, etc.) and uses appropriate reference data for each medication.",
789
  )
790
 
 
 
791
  cockcroft_gault_ui = gr.Interface(
792
  fn=calculate_creatinine_clearance_mcp,
793
  inputs=[
@@ -894,7 +917,6 @@ demo = gr.TabbedInterface(
894
  livertox_ui,
895
  brand_generic_ui,
896
  dbi_calculator_ui,
897
- dbi_mixed_routes_ui,
898
  cockcroft_gault_ui,
899
  ckd_epi_ui,
900
  child_pugh_ui,
@@ -913,7 +935,6 @@ demo = gr.TabbedInterface(
913
  "LiverTox",
914
  "Brand to Generic",
915
  "DBI Calculator",
916
- "DBI Mixed Routes",
917
  "Creatinine CL",
918
  "eGFR",
919
  "Child-Pugh",
 
1
  import gradio as gr
2
  from typing import Dict, Any
3
+ from datetime import datetime, timedelta
4
 
5
+ from brand_to_generic import brand_lookup, set_pbs_data
6
  from dbi_mcp import dbi_mcp, dbi_mcp_mixed_routes
7
  from clinical_calculators import (
8
  cockcroft_gault_creatinine_clearance,
 
33
  )
34
  import time
35
  import sys
36
+ import logging
37
+ from apscheduler.schedulers.background import BackgroundScheduler
38
+ import pandas as pd
39
+
40
+ try:
41
+ from datasets import load_dataset
42
+ HAVE_DATASETS = True
43
+ except ImportError:
44
+ HAVE_DATASETS = False
45
+
46
+ # Setup logging
47
+ logging.basicConfig(level=logging.INFO)
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ def load_pbs_data():
52
+ """Load PBS data from Hugging Face Hub, with fallback to previous month."""
53
+ if not HAVE_DATASETS:
54
+ logger.warning("`datasets` library not installed. Skipping PBS data load.")
55
+ set_pbs_data(pd.DataFrame())
56
+ return
57
+
58
+ today = datetime.now()
59
+ current_month_str = today.strftime("%Y-%m")
60
+
61
+ first_day_current_month = today.replace(day=1)
62
+ last_day_last_month = first_day_current_month - timedelta(days=1)
63
+ last_month_str = last_day_last_month.strftime("%Y-%m")
64
+
65
+ loaded = False
66
+ for month_str in [current_month_str, last_month_str]:
67
+ try:
68
+ logger.info(f"Attempting to load PBS data for {month_str}")
69
+ ds = load_dataset("cmcmaster/pbs_items", month_str, trust_remote_code=True)
70
+ if 'train' in ds:
71
+ pbs_df = ds['train'].to_pandas()
72
+ set_pbs_data(pbs_df)
73
+ logger.info(f"Successfully loaded PBS data for {month_str}. Shape: {pbs_df.shape}")
74
+ loaded = True
75
+ break
76
+ else:
77
+ logger.error(f"No 'train' split found in dataset for month {month_str}")
78
+
79
+ except Exception as e:
80
+ logger.warning(f"Failed to load PBS data for {month_str}: {e}")
81
+
82
+ if not loaded:
83
+ logger.error(f"Failed to load PBS data for both {current_month_str} and {last_month_str}. PBS lookups will be disabled.")
84
+ set_pbs_data(pd.DataFrame())
85
+
86
+ # Initial load on startup
87
+ logger.info("Performing initial load of PBS data...")
88
+ load_pbs_data()
89
+
90
+ # Schedule daily refresh
91
+ scheduler = BackgroundScheduler(daemon=True)
92
+ scheduler.add_job(load_pbs_data, 'interval', days=1)
93
+ scheduler.start()
94
 
95
 
96
  @with_error_handling
 
105
  return standardize_response(result, "brand_to_generic")
106
 
107
 
108
+
 
 
 
109
 
110
 
111
  @with_error_handling
 
279
 
280
 
281
  @with_error_handling
282
+ def brand_to_generic_lookup_mcp(brand_name: str) -> str:
283
  """
284
  Look up generic drug information from brand names.
285
 
286
  Args:
287
  brand_name (str): Brand name to look up
 
288
 
289
  Returns:
290
  str: JSON string with generic drug information and country-specific data
291
  """
292
+ result = _brand_lookup_gradio(brand_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  return format_json_output(result)
294
 
295
 
296
  @with_error_handling
297
+ def calculate_drug_burden_index_mcp(drug_list: str) -> str:
298
  """
299
  Calculate Drug Burden Index (DBI) from a list of medications with automatic route detection.
300
 
301
+ This intelligent version automatically detects the route of administration for each medication
302
  (oral, transdermal patches, parenteral injections, etc.) and uses the appropriate reference
303
+ data for each route. Perfect for real-world medication lists with mixed formulations.
304
 
305
  Args:
306
  drug_list (str): Drug list (one per line, include dose and frequency - also write "prn" if the drug is a PRN medication)
 
316
  return format_json_output(result)
317
 
318
 
319
+
320
+
321
+
322
  @with_error_handling
323
  def calculate_creatinine_clearance_mcp(
324
  age: str, weight_kg: str, serum_creatinine: str, is_female: str
 
787
  fn=brand_to_generic_lookup_mcp,
788
  inputs=[
789
  gr.Text(label="Brand Name"),
 
 
 
 
790
  ],
791
  outputs=gr.JSON(label="Output"),
792
  title="Brand to Generic",
 
796
 
797
  dbi_calculator_ui = gr.Interface(
798
  fn=calculate_drug_burden_index_mcp,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  inputs=[
800
  gr.Textbox(
801
  label="Drug List (one per line, include dose and frequency)",
 
804
  ),
805
  ],
806
  outputs=gr.JSON(label="DBI Calculation with Route Detection"),
807
+ title="DBI Calculator",
808
+ api_name="dbi_calculator",
809
+ description="Intelligent DBI calculator that automatically detects routes (oral, patches, injections, etc.) and uses appropriate reference data for each medication.",
810
  )
811
 
812
+
813
+
814
  cockcroft_gault_ui = gr.Interface(
815
  fn=calculate_creatinine_clearance_mcp,
816
  inputs=[
 
917
  livertox_ui,
918
  brand_generic_ui,
919
  dbi_calculator_ui,
 
920
  cockcroft_gault_ui,
921
  ckd_epi_ui,
922
  child_pugh_ui,
 
935
  "LiverTox",
936
  "Brand to Generic",
937
  "DBI Calculator",
 
938
  "Creatinine CL",
939
  "eGFR",
940
  "Child-Pugh",
brand_to_generic.py CHANGED
@@ -9,6 +9,10 @@ from typing import Dict, List, Optional
9
  import requests
10
  import csv
11
  from io import StringIO
 
 
 
 
12
 
13
  logger = logging.getLogger(__name__)
14
 
@@ -18,6 +22,28 @@ _session = requests.Session()
18
  DEFAULT_TIMEOUT = 5 # Reduced from 10
19
  FAST_TIMEOUT = 3 # For quick checks
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  class _Throttle:
23
  """Simple host-level throttle (~1 rps)."""
@@ -58,6 +84,7 @@ _RX_RE_FMT = (
58
 
59
  @functools.lru_cache(maxsize=512)
60
  def _rxnorm_lookup(brand: str):
 
61
  r = _get("https://rxnav.nlm.nih.gov/REST/rxcui.json", params={"name": brand})
62
  if not r or not r.json().get("idGroup", {}).get("rxnormId"):
63
  return []
@@ -89,6 +116,7 @@ _OPENFDA_NDC = "https://api.fda.gov/drug/ndc.json"
89
 
90
  @functools.lru_cache(maxsize=512)
91
  def _openfda_ndc(brand: str):
 
92
  r = _get(_OPENFDA_NDC, params={"search": f'brand_name:"{brand}"', "limit": 20})
93
  if not r:
94
  return []
@@ -125,6 +153,7 @@ _DPD = "https://health-products.canada.ca/api/drug/drugproduct/"
125
 
126
  @functools.lru_cache(maxsize=512)
127
  def _dpd_lookup(brand: str):
 
128
  r = _get(_DPD, params={"brandname": brand, "lang": "en", "type": "json"})
129
  if not r:
130
  return []
@@ -146,37 +175,12 @@ def _dpd_lookup(brand: str):
146
  return out
147
 
148
 
149
- _PBS_V3_BASE_URL = "https://data-api.health.gov.au/pbs/api/v3"
150
- _PBS_SUBSCRIPTION_KEY = os.getenv(
151
- "PBS_API_SUBSCRIPTION_KEY", "2384af7c667342ceb5a736fe29f1dc6b"
152
- )
153
-
154
-
155
- def _pbs_v3_get(
156
- endpoint: str, params: Optional[Dict] = None, accept_type: str = "application/json"
157
- ):
158
- """Helper to make GET requests to PBS API v3 with auth and throttling."""
159
- url = f"{_PBS_V3_BASE_URL}/{endpoint}"
160
- headers = {"subscription-key": _PBS_SUBSCRIPTION_KEY, "Accept": accept_type}
161
- host = requests.utils.urlparse(url).netloc
162
- _Throttle.wait(host, gap=5.0) # PBS API specific throttle (1 req per 5 sec)
163
- try:
164
- r = _session.get(url, headers=headers, params=params, timeout=20)
165
- r.raise_for_status()
166
- return r
167
- except Exception as exc:
168
- logger.warning(
169
- "PBS API v3 request failed for %s (params: %s): %s", url, params, exc
170
- )
171
- return None
172
-
173
-
174
  def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
175
  """Parses strength and dosage form from an li_form string."""
176
  if not li_form_str:
177
  return {"strength": None, "dosage_form": None}
178
 
179
- strength_regex = r"(\\d[\\d.,\\s]*(?:mg|mcg|g|mL|L|microlitres|nanograms|IU|%|mmol)(?:[\\s\\/][\\d.,\\s]*(?:mg|mcg|g|mL|L|microlitres|dose(?:s)?))?(?:\\s*\\(.*?\\))?(?:\\s+in\\s+[\\d.,\\s]*(?:mL|L|g|mg))?)"
180
 
181
  strength_match = re.search(strength_regex, li_form_str, re.IGNORECASE)
182
 
@@ -196,7 +200,7 @@ def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
196
  extracted_form = form_after
197
 
198
  if not extracted_form and not extracted_strength:
199
- if not re.search(r"\\d", li_form_str):
200
  extracted_form = li_form_str.strip()
201
  else:
202
  extracted_form = li_form_str.strip()
@@ -209,89 +213,48 @@ def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
209
 
210
  @functools.lru_cache(maxsize=512)
211
  def _pbs_lookup(brand: str):
212
- schedules_resp = _pbs_v3_get("schedules", params={"limit": 1})
213
- if not schedules_resp:
214
  return []
215
- try:
216
- schedules_data = schedules_resp.json()
217
- if not schedules_data.get("data") or not schedules_data["data"][0].get(
218
- "schedule_code"
219
- ):
220
- logger.warning(
221
- "PBS API v3: Could not get schedule code from response: %s",
222
- schedules_data,
223
- )
224
- return []
225
- schedule_code = schedules_data["data"][0]["schedule_code"]
226
- except (ValueError, IndexError, KeyError) as e:
227
- logger.warning("PBS API v3: Error parsing schedules response: %s", e)
228
  return []
229
 
230
- items_resp = _pbs_v3_get(
231
- "items",
232
- params={"schedule_code": schedule_code, "brand_name": brand, "limit": 20},
233
- accept_type="text/csv",
234
- )
235
- if not items_resp:
236
  return []
237
 
238
  out = []
239
- try:
240
- csv_text = items_resp.text
241
- if not csv_text.strip():
242
- logger.info(
243
- "PBS API v3: Received empty CSV for brand '%s' with schedule '%s'",
244
- brand,
245
- schedule_code,
246
- )
247
- return []
248
-
249
- csvfile = StringIO(csv_text)
250
- reader = csv.DictReader(csvfile)
251
- for row in reader:
252
- li_form = row.get("li_form")
253
- parsed_form_strength = _parse_li_form(li_form)
254
-
255
- generic_name = row.get("drug_name", "").strip() or None
256
-
257
- query_params = {
258
- "schedule_code": schedule_code,
259
- "brand_name": requests.utils.quote(brand),
260
  }
261
- source_url_params = "&".join([f"{k}={v}" for k, v in query_params.items()])
262
- source_url = f"{_PBS_V3_BASE_URL}/items?{source_url_params}"
263
-
264
- out.append(
265
- {
266
- "generic_name": generic_name,
267
- "strength": parsed_form_strength["strength"],
268
- "dosage_form": parsed_form_strength["dosage_form"],
269
- "route": row.get("manner_of_administration", "").strip() or None,
270
- "country": "AU",
271
- "source": "PBS API v3",
272
- "ids": {"pbs_item_code": row.get("pbs_code", "").strip()},
273
- "source_url": source_url,
274
- }
275
- )
276
- except csv.Error as e:
277
- logger.warning(
278
- "PBS API v3: CSV parsing error for brand '%s': %s. CSV content: %s",
279
- brand,
280
- e,
281
- csv_text[:500],
282
  )
283
- return []
284
- except Exception as e:
285
- logger.exception(
286
- "PBS API v3: Unexpected error processing items for brand '%s': %s", brand, e
287
- )
288
- return []
289
-
290
  return out
291
 
292
 
293
  @functools.lru_cache(maxsize=512)
294
  def _pubchem_synonym_lookup(brand: str):
 
295
  url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{requests.utils.quote(brand)}/synonyms/JSON"
296
  r = _get(url)
297
  if not r:
@@ -333,9 +296,9 @@ def brand_lookup(
333
 
334
  for fn in (
335
  _pbs_lookup,
336
- _rxnorm_lookup,
337
- _openfda_ndc,
338
- _dpd_lookup,
339
  _pubchem_synonym_lookup,
340
  ):
341
  try:
 
9
  import requests
10
  import csv
11
  from io import StringIO
12
+ try:
13
+ import pandas as pd
14
+ except ImportError:
15
+ pd = None
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
22
  DEFAULT_TIMEOUT = 5 # Reduced from 10
23
  FAST_TIMEOUT = 3 # For quick checks
24
 
25
+ # Global to hold PBS data
26
+ pbs_data: Optional["pd.DataFrame"] = None
27
+
28
+ # Testing mode flag to disable external API calls
29
+ TESTING_MODE = False
30
+
31
+ def set_pbs_data(data: "pd.DataFrame"):
32
+ """Sets the global PBS dataframe."""
33
+ global pbs_data
34
+ pbs_data = data
35
+ if pbs_data is not None:
36
+ logger.info(f"PBS data updated. Shape: {pbs_data.shape}")
37
+ else:
38
+ logger.info("PBS data cleared.")
39
+
40
+ def set_testing_mode(is_testing: bool):
41
+ """Enable/disable testing mode to bypass external API calls."""
42
+ global TESTING_MODE
43
+ TESTING_MODE = is_testing
44
+ if TESTING_MODE:
45
+ logger.warning("Testing mode is enabled. External API calls will be bypassed.")
46
+
47
 
48
  class _Throttle:
49
  """Simple host-level throttle (~1 rps)."""
 
84
 
85
  @functools.lru_cache(maxsize=512)
86
  def _rxnorm_lookup(brand: str):
87
+ if TESTING_MODE: return []
88
  r = _get("https://rxnav.nlm.nih.gov/REST/rxcui.json", params={"name": brand})
89
  if not r or not r.json().get("idGroup", {}).get("rxnormId"):
90
  return []
 
116
 
117
  @functools.lru_cache(maxsize=512)
118
  def _openfda_ndc(brand: str):
119
+ if TESTING_MODE: return []
120
  r = _get(_OPENFDA_NDC, params={"search": f'brand_name:"{brand}"', "limit": 20})
121
  if not r:
122
  return []
 
153
 
154
  @functools.lru_cache(maxsize=512)
155
  def _dpd_lookup(brand: str):
156
+ if TESTING_MODE: return []
157
  r = _get(_DPD, params={"brandname": brand, "lang": "en", "type": "json"})
158
  if not r:
159
  return []
 
175
  return out
176
 
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def _parse_li_form(li_form_str: Optional[str]) -> Dict[str, Optional[str]]:
179
  """Parses strength and dosage form from an li_form string."""
180
  if not li_form_str:
181
  return {"strength": None, "dosage_form": None}
182
 
183
+ strength_regex = r"(\d[\d.,\s]*(?:mg|mcg|g|mL|L|microlitres|nanograms|IU|%|mmol)(?:[\s\/][\d.,\s]*(?:mg|mcg|g|mL|L|microlitres|dose(?:s)?))?(?:\s*\(.*?\))?(?:\s+in\s+[\d.,\s]*(?:mL|L|g|mg))?)"
184
 
185
  strength_match = re.search(strength_regex, li_form_str, re.IGNORECASE)
186
 
 
200
  extracted_form = form_after
201
 
202
  if not extracted_form and not extracted_strength:
203
+ if not re.search(r"\d", li_form_str):
204
  extracted_form = li_form_str.strip()
205
  else:
206
  extracted_form = li_form_str.strip()
 
213
 
214
  @functools.lru_cache(maxsize=512)
215
  def _pbs_lookup(brand: str):
216
+ if pbs_data is None or pbs_data.empty:
217
+ logger.warning("PBS data not loaded or empty. Skipping PBS lookup for '%s'.", brand)
218
  return []
219
+
220
+ brand_lower = brand.lower()
221
+
222
+ if 'brand_name' not in pbs_data.columns:
223
+ logger.error("PBS data does not contain 'brand_name' column. Skipping lookup.")
 
 
 
 
 
 
 
 
224
  return []
225
 
226
+ results_df = pbs_data[pbs_data['brand_name'].str.lower() == brand_lower]
227
+
228
+ if results_df.empty:
 
 
 
229
  return []
230
 
231
  out = []
232
+ source_url = "https://huggingface.co/datasets/cmcmaster/pbs_items"
233
+
234
+ for _, row in results_df.iterrows():
235
+ li_form = row.get("li_form")
236
+ parsed_form_strength = _parse_li_form(li_form)
237
+ generic_name = row.get("drug_name", "").strip() or None
238
+
239
+ out.append(
240
+ {
241
+ "generic_name": generic_name,
242
+ "strength": parsed_form_strength["strength"],
243
+ "dosage_form": parsed_form_strength["dosage_form"],
244
+ "route": row.get("manner_of_administration", "").strip() or None,
245
+ "country": "AU",
246
+ "source": "PBS (via Hugging Face Dataset)",
247
+ "ids": {"pbs_item_code": row.get("pbs_code", "").strip()},
248
+ "source_url": source_url,
 
 
 
 
249
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  )
251
+
 
 
 
 
 
 
252
  return out
253
 
254
 
255
  @functools.lru_cache(maxsize=512)
256
  def _pubchem_synonym_lookup(brand: str):
257
+ if TESTING_MODE: return []
258
  url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{requests.utils.quote(brand)}/synonyms/JSON"
259
  r = _get(url)
260
  if not r:
 
296
 
297
  for fn in (
298
  _pbs_lookup,
299
+ # _rxnorm_lookup, These three fail, so skip them for now
300
+ # _openfda_ndc,
301
+ # _dpd_lookup,
302
  _pubchem_synonym_lookup,
303
  ):
304
  try:
dbi_mcp.py CHANGED
@@ -10,6 +10,7 @@ from typing import Dict, List, Tuple, Optional, Union, Mapping, Sequence
10
  from brand_to_generic import brand_lookup
11
 
12
  import csv
 
13
 
14
  try:
15
  import pandas as pd
@@ -23,8 +24,11 @@ __all__ = [
23
  "calculate_dbi",
24
  "print_report",
25
  "detect_route_from_text",
 
 
26
  "dbi_mcp",
27
  "dbi_mcp_mixed_routes",
 
28
  ]
29
 
30
  PatientInput = Union[
@@ -33,6 +37,15 @@ PatientInput = Union[
33
  Mapping[str, float],
34
  ]
35
 
 
 
 
 
 
 
 
 
 
36
  # Route detection patterns
37
  ROUTE_PATTERNS = {
38
  'transdermal': [
@@ -96,6 +109,107 @@ def detect_route_from_text(text: str) -> str:
96
  return 'oral'
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def load_reference(
100
  ref_path: Path,
101
  *,
@@ -204,15 +318,17 @@ def calculate_dbi(
204
 
205
  logger = logging.getLogger(__name__)
206
 
207
- UNIT_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(?P<unit>mcg|μg|mg|g)\b", re.I)
208
 
209
- PATCH_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(mcg|μg)\s*/\s*hr", re.I)
210
 
211
- CONC_PAT = re.compile(r"(?P<drug_val>\d+(?:[.,]\d+)?)(?:\s*)(?P<drug_unit>mcg|μg|mg|g)\s*/\s*(?P<vol_val>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
 
 
212
 
213
  VOL_PAT = re.compile(r"(?P<voldose>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
214
 
215
- QTY_PAT = re.compile(r"(?<!\d)(?P<qty>\d+)\s*(?:tab|caps?|puff|spray|patch|patches)s?\b", re.I)
216
 
217
  FREQ_PAT = re.compile(r"\b(q\d{1,2}h|qd|od|daily|once daily|bid|bd|twice daily|tid|tds|three times daily|qid|four times daily|nocte|mane|am|pm)\b", re.I)
218
  EVERY_HOURS_PAT = re.compile(r"q(\d{1,2})h", re.I)
@@ -227,13 +343,16 @@ _FREQ_MAP = {
227
  }
228
 
229
  def _unit_to_mg(val: float, unit: str) -> float:
230
- unit = unit.lower()
231
  if unit == "mg":
232
  return val
233
- if unit in {"g"}:
234
  return val * 1_000
235
- if unit in {"mcg", "μg"}:
236
  return val / 1_000
 
 
 
237
  return math.nan
238
 
239
 
@@ -247,7 +366,8 @@ def _freq_to_per_day(token: str) -> float:
247
  return 24 / hrs if hrs else 1
248
  return 1
249
 
250
- Parsed = Tuple[str, float, bool, str] # Added route detection
 
251
 
252
  @functools.lru_cache(maxsize=2048)
253
  def _parse_line(line: str) -> Optional[Parsed]:
@@ -266,6 +386,65 @@ def _parse_line(line: str) -> Optional[Parsed]:
266
  # Override route detection for patches
267
  return (name_part, mg_day, is_prn, "transdermal")
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  m_conc = CONC_PAT.search(original)
270
  m_vol = VOL_PAT.search(original)
271
  if m_conc and m_vol:
@@ -288,10 +467,17 @@ def _parse_line(line: str) -> Optional[Parsed]:
288
  m = UNIT_PAT.search(original)
289
  if m:
290
  strength_mg = _unit_to_mg(float(m.group("val").replace(",", ".")), m.group("unit"))
291
- qty = 1
 
 
 
292
  m_qty = QTY_PAT.search(original)
293
  if m_qty:
294
- qty = int(m_qty.group("qty"))
 
 
 
 
295
  freq = 1.0
296
  m_freq = FREQ_PAT.search(original)
297
  if m_freq:
@@ -302,9 +488,61 @@ def _parse_line(line: str) -> Optional[Parsed]:
302
  name_part = re.sub(r"\s+", " ", name_part).strip()
303
  return (name_part, mg_day, is_prn, detected_route)
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  logger.debug("unhandled line: %s", original)
306
  return None
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  def _smart_drug_lookup(raw_name: str, all_routes_reference: Dict[str, Dict[str, Tuple[float, str]]]) -> str:
309
  """
310
  Smart drug name resolution that avoids unnecessary API calls.
@@ -514,6 +752,159 @@ def dbi_mcp_mixed_routes(text_block: str, *, ref_csv: Union[str, Path] = "dbi_re
514
  }
515
 
516
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  if __name__ == "__main__":
518
  import sys
519
  import pprint
 
10
  from brand_to_generic import brand_lookup
11
 
12
  import csv
13
+ import json
14
 
15
  try:
16
  import pandas as pd
 
24
  "calculate_dbi",
25
  "print_report",
26
  "detect_route_from_text",
27
+ "detect_combination_drug",
28
+ "split_combination_drug_simple",
29
  "dbi_mcp",
30
  "dbi_mcp_mixed_routes",
31
+ "dbi_mcp_with_combinations",
32
  ]
33
 
34
  PatientInput = Union[
 
37
  Mapping[str, float],
38
  ]
39
 
40
+ # Combination drug detection patterns
41
+ COMBINATION_PATTERNS = [
42
+ r'\bco-?\w+\b', # co- prefix with optional hyphen (co-codamol, cocodamol)
43
+ r'\b\w+[-/]\w+\b', # hyphen or slash separated (paracetamol-codeine, aspirin/caffeine)
44
+ r'\b\w+\s*\+\s*\w+\b', # plus sign (aspirin + caffeine)
45
+ r'\b\w+\s*with\s+\w+\b', # "with" combinations
46
+ r'\b\w+\s*and\s+\w+\b', # "and" combinations
47
+ ]
48
+
49
  # Route detection patterns
50
  ROUTE_PATTERNS = {
51
  'transdermal': [
 
109
  return 'oral'
110
 
111
 
112
+ def detect_combination_drug(drug_name: str) -> bool:
113
+ """
114
+ Detect if a drug name appears to be a combination drug.
115
+ """
116
+ drug_name_lower = drug_name.lower()
117
+
118
+ for pattern in COMBINATION_PATTERNS:
119
+ if re.search(pattern, drug_name_lower):
120
+ return True
121
+
122
+ # Check for multiple doses in parentheses (e.g., "500mg-9.6mg")
123
+ if re.search(r'\d+(?:\.\d+)?\s*mg\s*[-/]\s*\d+(?:\.\d+)?\s*mg', drug_name_lower):
124
+ return True
125
+
126
+ return False
127
+
128
+
129
+ def split_combination_drug_simple(drug_text: str) -> List[Tuple[str, str, str]]:
130
+ """
131
+ Simple rule-based splitting for common combination patterns.
132
+ Returns list of (component_name, original_text, notes).
133
+ """
134
+ components = []
135
+ drug_text_lower = drug_text.lower()
136
+
137
+ # Handle common combinations
138
+ known_combinations = {
139
+ 'co-codamol': [('paracetamol', 'paracetamol component of co-codamol'),
140
+ ('codeine', 'codeine component of co-codamol')],
141
+ 'cocodamol': [('paracetamol', 'paracetamol component of co-codamol'),
142
+ ('codeine', 'codeine component of co-codamol')],
143
+ 'co-trimoxazole': [('trimethoprim', 'trimethoprim component of co-trimoxazole'),
144
+ ('sulfamethoxazole', 'sulfamethoxazole component of co-trimoxazole')],
145
+ 'cotrimoxazole': [('trimethoprim', 'trimethoprim component of co-trimoxazole'),
146
+ ('sulfamethoxazole', 'sulfamethoxazole component of co-trimoxazole')],
147
+ 'paracetamol-codeine': [('paracetamol', 'paracetamol component'),
148
+ ('codeine', 'codeine component')],
149
+ 'aspirin-caffeine': [('aspirin', 'aspirin component'),
150
+ ('caffeine', 'caffeine component')],
151
+ 'tylenol-codeine': [('paracetamol', 'paracetamol component'),
152
+ ('codeine', 'codeine component')],
153
+ # Brand name combinations
154
+ 'vytorin': [('ezetimibe', 'ezetimibe component of Vytorin'),
155
+ ('simvastatin', 'simvastatin component of Vytorin')],
156
+ 'exforge': [('amlodipine', 'amlodipine component of Exforge'),
157
+ ('valsartan', 'valsartan component of Exforge')],
158
+ 'caduet': [('amlodipine', 'amlodipine component of Caduet'),
159
+ ('atorvastatin', 'atorvastatin component of Caduet')],
160
+ 'janumet': [('sitagliptin', 'sitagliptin component of Janumet'),
161
+ ('metformin', 'metformin component of Janumet')],
162
+ 'combigan': [('brimonidine', 'brimonidine component of Combigan'),
163
+ ('timolol', 'timolol component of Combigan')],
164
+ }
165
+
166
+ # Check for known combinations
167
+ for combo_name, combo_components in known_combinations.items():
168
+ if combo_name in drug_text_lower:
169
+ for comp_name, note in combo_components:
170
+ components.append((comp_name, drug_text, note))
171
+ return components
172
+
173
+ # Try to split hyphenated/slashed combinations
174
+ if '-' in drug_text or '/' in drug_text:
175
+ # Extract the drug name part (before dosing info)
176
+ drug_name_part = re.split(r'\d+', drug_text)[0].strip()
177
+ separators = ['-', '/', '+']
178
+
179
+ for sep in separators:
180
+ if sep in drug_name_part:
181
+ parts = [part.strip() for part in drug_name_part.split(sep)]
182
+ if len(parts) == 2:
183
+ for part in parts:
184
+ if part and len(part) > 2: # Avoid single letters
185
+ components.append((part, drug_text, f'Component of combination drug'))
186
+ return components
187
+
188
+ return components
189
+
190
+
191
+ def needs_llm_splitting(drug_text: str) -> bool:
192
+ """
193
+ Determine if a combination drug needs LLM assistance for splitting.
194
+ """
195
+ if not detect_combination_drug(drug_text):
196
+ return False
197
+
198
+ # Try simple splitting first
199
+ simple_components = split_combination_drug_simple(drug_text)
200
+
201
+ # If simple splitting failed or returned unclear results, use LLM
202
+ if not simple_components:
203
+ return True
204
+
205
+ # If components are too short or unclear, use LLM
206
+ for comp_name, _, _ in simple_components:
207
+ if len(comp_name) < 3 or comp_name.isdigit():
208
+ return True
209
+
210
+ return False
211
+
212
+
213
  def load_reference(
214
  ref_path: Path,
215
  *,
 
318
 
319
  logger = logging.getLogger(__name__)
320
 
321
+ UNIT_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(?P<unit>mcg|μg|mg|g|iu|units?|micrograms?|mmol)\b", re.I)
322
 
323
+ PATCH_PAT = re.compile(r"(?P<val>\d+(?:[.,]\d+)?)(?:\s*)(mcg|μg|microg)\s*/\s*hr", re.I)
324
 
325
+ PERCENT_PAT = re.compile(r"\b(?P<percent>\d+(?:\.\d+)?)\s*%\b")
326
+
327
+ CONC_PAT = re.compile(r"(?P<drug_val>\d+(?:[.,]\d+)?)(?:\s*)(?P<drug_unit>mcg|μg|mg|g|iu|units?)\s*/\s*(?P<vol_val>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
328
 
329
  VOL_PAT = re.compile(r"(?P<voldose>\d+(?:[.,]\d+)?)(?:\s*)m ?l", re.I)
330
 
331
+ QTY_PAT = re.compile(r"(?<!\d)(?P<qty>\d+(?:\s*-\s*\d+)?)\s*(?:tab|caps?|puff|spray|patch|patches|sachet|tube|inhalation|drop)s?\b", re.I)
332
 
333
  FREQ_PAT = re.compile(r"\b(q\d{1,2}h|qd|od|daily|once daily|bid|bd|twice daily|tid|tds|three times daily|qid|four times daily|nocte|mane|am|pm)\b", re.I)
334
  EVERY_HOURS_PAT = re.compile(r"q(\d{1,2})h", re.I)
 
343
  }
344
 
345
  def _unit_to_mg(val: float, unit: str) -> float:
346
+ unit = unit.lower().removesuffix('s')
347
  if unit == "mg":
348
  return val
349
+ if unit == "g":
350
  return val * 1_000
351
+ if unit in {"mcg", "μg", "microgram"}:
352
  return val / 1_000
353
+ if unit in {"iu", "unit", "mmol"}:
354
+ logger.debug("Cannot reliably convert '%s' to mg. Returning 0.", unit)
355
+ return 0.0
356
  return math.nan
357
 
358
 
 
366
  return 24 / hrs if hrs else 1
367
  return 1
368
 
369
+ Parsed = Tuple[str, float, bool, str] # (name, mg_day, is_prn, route)
370
+ ParsedCombination = Tuple[str, float, bool, str, bool, List[Tuple[str, str, str]]] # (name, mg_day, is_prn, route, is_combination, components)
371
 
372
  @functools.lru_cache(maxsize=2048)
373
  def _parse_line(line: str) -> Optional[Parsed]:
 
386
  # Override route detection for patches
387
  return (name_part, mg_day, is_prn, "transdermal")
388
 
389
+ # Try parsing percentage-based topicals/solutions before standard units
390
+ m_percent = PERCENT_PAT.search(original)
391
+ if m_percent:
392
+ percent_val = float(m_percent.group("percent"))
393
+
394
+ # For liquids where volume is given (e.g., 2% solution, 10mL dose)
395
+ m_vol = VOL_PAT.search(original)
396
+ if m_vol:
397
+ voldose_ml = float(m_vol.group("voldose").replace(",", "."))
398
+ # Assume % is g/100mL for liquids
399
+ strength_g_per_100ml = percent_val
400
+ mg_per_dose = (strength_g_per_100ml * 1000) * (voldose_ml / 100)
401
+
402
+ freq = 1.0
403
+ m_freq = FREQ_PAT.search(original)
404
+ if m_freq:
405
+ freq = _freq_to_per_day(m_freq.group(0))
406
+
407
+ mg_day = mg_per_dose * freq
408
+ name_part = original[:m_percent.start()].strip()
409
+ name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
410
+ return (name_part, mg_day, is_prn, detected_route)
411
+
412
+ # Handle drops with percentage strength
413
+ if 'drop' in original.lower():
414
+ # Assume 20 drops/mL for ophthalmic solutions
415
+ g_per_100ml = percent_val
416
+ mg_per_ml = g_per_100ml * 10 # 1% -> 1g/100mL -> 10mg/mL
417
+
418
+ qty = 1.0
419
+ m_qty = QTY_PAT.search(original) # QTY_PAT now includes 'drop'
420
+ if m_qty:
421
+ qty_str = m_qty.group("qty").split('-')[-1].strip() # Use upper end of range
422
+ try:
423
+ qty = float(qty_str)
424
+ except ValueError:
425
+ qty = 1.0
426
+
427
+ # Dose in mg = (number of drops / 20 drops_per_mL) * mg_per_mL
428
+ mg_per_dose = (qty / 20.0) * mg_per_ml
429
+
430
+ freq = 1.0
431
+ m_freq = FREQ_PAT.search(original)
432
+ if m_freq:
433
+ freq = _freq_to_per_day(m_freq.group(0))
434
+
435
+ mg_day = mg_per_dose * freq
436
+ name_part = original[:m_percent.start()].strip()
437
+ name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
438
+ return (name_part, mg_day, is_prn, detected_route)
439
+
440
+ # For cases with 'application' or 'drop' (e.g., 0.05% cream, 1 application)
441
+ if 'application' in original.lower() or 'ointment' in original.lower():
442
+ # Can't calculate mg dose, but we can parse the drug name.
443
+ name_part = original[:m_percent.start()].strip()
444
+ name_part = re.sub(r"[^A-Za-z0-9\s-]", " ", name_part).strip()
445
+ logger.debug("Parsed %%-based item but cannot quantify mg/day: %s", original)
446
+ return (name_part, 0.0, is_prn, detected_route)
447
+
448
  m_conc = CONC_PAT.search(original)
449
  m_vol = VOL_PAT.search(original)
450
  if m_conc and m_vol:
 
467
  m = UNIT_PAT.search(original)
468
  if m:
469
  strength_mg = _unit_to_mg(float(m.group("val").replace(",", ".")), m.group("unit"))
470
+ if math.isnan(strength_mg):
471
+ logger.debug("Unhandled unit '%s' in line: %s", m.group("unit"), original)
472
+ return None
473
+ qty = 1.0
474
  m_qty = QTY_PAT.search(original)
475
  if m_qty:
476
+ qty_str = m_qty.group("qty").split('-')[-1].strip()
477
+ try:
478
+ qty = float(qty_str)
479
+ except ValueError:
480
+ qty = 1.0
481
  freq = 1.0
482
  m_freq = FREQ_PAT.search(original)
483
  if m_freq:
 
488
  name_part = re.sub(r"\s+", " ", name_part).strip()
489
  return (name_part, mg_day, is_prn, detected_route)
490
 
491
+ # Handle unitless doses like "..., 5, oral" or "..., 2.5-5, oral"
492
+ m_unitless = re.search(r"[,\(]\s*(?P<dose>\d+(?:\.\d+)?(?:\s*-\s*\d+(?:\.\d+)?)?)\s*,\s*(?:oral|sublingual|buccal)", original, re.I)
493
+ if m_unitless:
494
+ dose_str = m_unitless.group("dose").split('-')[-1].strip()
495
+ try:
496
+ strength_mg = float(dose_str) # Assume mg
497
+ freq = 1.0
498
+ m_freq = FREQ_PAT.search(original)
499
+ if m_freq:
500
+ freq = _freq_to_per_day(m_freq.group(0))
501
+
502
+ mg_day = strength_mg * freq
503
+ name_part = original[:m_unitless.start()].strip()
504
+ name_part = re.sub(r"\(.*?\)", "", name_part).strip() # Remove bracketed part of name
505
+ return (name_part, mg_day, is_prn, detected_route)
506
+ except ValueError:
507
+ pass # Could not convert to float
508
+
509
  logger.debug("unhandled line: %s", original)
510
  return None
511
 
512
+
513
+ def _parse_line_with_combinations(line: str) -> Optional[ParsedCombination]:
514
+ """
515
+ Enhanced parsing that detects and handles combination drugs.
516
+ Returns (name, mg_day, is_prn, route, is_combination, components)
517
+ """
518
+ # First try normal parsing
519
+ parsed = _parse_line(line)
520
+ if not parsed:
521
+ return None
522
+
523
+ name, mg_day, is_prn, route = parsed
524
+
525
+ # Check if this is a combination drug (check both the name and the full line)
526
+ is_combo_name = detect_combination_drug(name)
527
+ is_combo_line = detect_combination_drug(line)
528
+
529
+ if is_combo_name or is_combo_line:
530
+ # Try splitting with both the name and the full line
531
+ components = split_combination_drug_simple(name)
532
+ if not components:
533
+ components = split_combination_drug_simple(line)
534
+
535
+ if components:
536
+ logger.debug(f"Detected combination drug: {name} -> {[c[0] for c in components]}")
537
+ return (name, mg_day, is_prn, route, True, components)
538
+ else:
539
+ logger.debug(f"Combination drug detected but couldn't split: {name}")
540
+ # Mark as combination but with empty components (may need LLM splitting)
541
+ return (name, mg_day, is_prn, route, True, [])
542
+
543
+ # Not a combination drug
544
+ return (name, mg_day, is_prn, route, False, [])
545
+
546
  def _smart_drug_lookup(raw_name: str, all_routes_reference: Dict[str, Dict[str, Tuple[float, str]]]) -> str:
547
  """
548
  Smart drug name resolution that avoids unnecessary API calls.
 
752
  }
753
 
754
 
755
+ def dbi_mcp_with_combinations(text_block: str, *, ref_csv: Union[str, Path] = "dbi_reference_by_route.csv") -> dict:
756
+ """
757
+ Enhanced DBI calculator that handles combination drugs automatically.
758
+
759
+ This function:
760
+ 1. Detects combination drugs (e.g., paracetamol-codeine, co-codamol)
761
+ 2. Splits them into individual components
762
+ 3. Calculates DBI for each relevant component
763
+ 4. Provides detailed breakdown including combination drug handling
764
+ """
765
+ all_routes_ref = load_all_routes_reference(Path(ref_csv))
766
+
767
+ parsed_combinations: List[ParsedCombination] = []
768
+ unmatched: List[str] = []
769
+ route_stats: Dict[str, int] = {}
770
+ combination_drugs: List[Dict] = []
771
+
772
+ for ln in text_block.splitlines():
773
+ res = _parse_line_with_combinations(ln)
774
+ if res:
775
+ parsed_combinations.append(res)
776
+ route = res[3] # detected route
777
+ route_stats[route] = route_stats.get(route, 0) + 1
778
+ else:
779
+ unmatched.append(ln)
780
+
781
+ # Organize medications by route and PRN status, handling combinations
782
+ meds_by_route_with: Dict[str, Dict[str, float]] = {}
783
+ meds_by_route_without: Dict[str, Dict[str, float]] = {}
784
+ medication_details: List[Dict] = []
785
+
786
+ for name, mg_day, is_prn, detected_route, is_combination, components in parsed_combinations:
787
+
788
+ if is_combination and components:
789
+ # Handle combination drug by processing each component
790
+ combination_info = {
791
+ "original_text": f"{name} {mg_day}mg/day",
792
+ "is_combination": True,
793
+ "components": [],
794
+ "detected_route": detected_route,
795
+ "is_prn": is_prn
796
+ }
797
+
798
+ for comp_name, original_text, note in components:
799
+ generic = _smart_drug_lookup(comp_name, all_routes_ref)
800
+
801
+ # Initialize route dictionaries if needed
802
+ if detected_route not in meds_by_route_with:
803
+ meds_by_route_with[detected_route] = {}
804
+ meds_by_route_without[detected_route] = {}
805
+
806
+ # Add to appropriate dictionaries
807
+ # Note: We use the full dose for each component - this may need refinement
808
+ # based on actual component ratios in the combination
809
+ meds_by_route_with[detected_route][generic] = meds_by_route_with[detected_route].get(generic, 0.0) + mg_day
810
+ if not is_prn:
811
+ meds_by_route_without[detected_route][generic] = meds_by_route_without[detected_route].get(generic, 0.0) + mg_day
812
+
813
+ combination_info["components"].append({
814
+ "component_name": comp_name,
815
+ "generic_name": generic,
816
+ "note": note,
817
+ "dose_mg_day": mg_day # This is simplified - real combinations need dose splitting
818
+ })
819
+
820
+ combination_drugs.append(combination_info)
821
+ medication_details.append(combination_info)
822
+
823
+ else:
824
+ # Handle single drug (or unresolved combination)
825
+ generic = _smart_drug_lookup(name, all_routes_ref)
826
+
827
+ # Initialize route dictionaries if needed
828
+ if detected_route not in meds_by_route_with:
829
+ meds_by_route_with[detected_route] = {}
830
+ meds_by_route_without[detected_route] = {}
831
+
832
+ # Add to appropriate dictionaries
833
+ meds_by_route_with[detected_route][generic] = meds_by_route_with[detected_route].get(generic, 0.0) + mg_day
834
+ if not is_prn:
835
+ meds_by_route_without[detected_route][generic] = meds_by_route_without[detected_route].get(generic, 0.0) + mg_day
836
+
837
+ # Store medication details
838
+ medication_details.append({
839
+ "original_text": f"{name} {mg_day}mg/day",
840
+ "generic_name": generic,
841
+ "dose_mg_day": mg_day,
842
+ "detected_route": detected_route,
843
+ "is_prn": is_prn,
844
+ "is_combination": is_combination,
845
+ "combination_note": "Detected as combination but couldn't split" if is_combination else None
846
+ })
847
+
848
+ # Calculate DBI for each route (same as before)
849
+ route_results = {}
850
+ total_dbi_with = 0.0
851
+ total_dbi_without = 0.0
852
+ all_details_with = []
853
+ all_details_without = []
854
+
855
+ for route in meds_by_route_with.keys():
856
+ if route in all_routes_ref:
857
+ route_ref = all_routes_ref[route]
858
+
859
+ # Calculate DBI for this route
860
+ dbi_with, details_with = calculate_dbi(meds_by_route_with[route], route_ref)
861
+ dbi_without, details_without = calculate_dbi(meds_by_route_without[route], route_ref)
862
+
863
+ total_dbi_with += dbi_with
864
+ total_dbi_without += dbi_without
865
+
866
+ # Format details
867
+ def _format_details(details, route_name):
868
+ formatted = []
869
+ for g, d, delta, dbi in details:
870
+ formatted.append({
871
+ "generic_name": g,
872
+ "dose_mg_day": d,
873
+ "delta_mg": delta,
874
+ "dbi_component": dbi,
875
+ "route": route_name
876
+ })
877
+ return formatted
878
+
879
+ route_details_with = _format_details(details_with, route)
880
+ route_details_without = _format_details(details_without, route)
881
+
882
+ all_details_with.extend(route_details_with)
883
+ all_details_without.extend(route_details_without)
884
+
885
+ route_results[route] = {
886
+ "dbi_with_prn": round(dbi_with, 2),
887
+ "dbi_without_prn": round(dbi_without, 2),
888
+ "details_with_prn": route_details_with,
889
+ "details_without_prn": route_details_without,
890
+ "medication_count": route_stats.get(route, 0)
891
+ }
892
+
893
+ return {
894
+ "combination_handling": True,
895
+ "total_dbi_without_prn": round(total_dbi_without, 2),
896
+ "total_dbi_with_prn": round(total_dbi_with, 2),
897
+ "routes_detected": list(route_stats.keys()),
898
+ "route_statistics": route_stats,
899
+ "route_breakdown": route_results,
900
+ "all_details_without_prn": all_details_without,
901
+ "all_details_with_prn": all_details_with,
902
+ "medication_details": medication_details,
903
+ "combination_drugs": combination_drugs,
904
+ "unmatched_input": unmatched,
905
+ }
906
+
907
+
908
  if __name__ == "__main__":
909
  import sys
910
  import pprint
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
- gradio[mcp]
2
  requests
 
 
3
  datasets
 
4
  beautifulsoup4
5
- pandas
 
 
1
  requests
2
+ pandas
3
+ gradio
4
  datasets
5
+ apscheduler
6
  beautifulsoup4
7
+ lxml