Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,7 +31,7 @@ logger = logging.getLogger("health-agent")
|
|
| 31 |
# --- Environment & config -------------------------------------------------
|
| 32 |
load_dotenv()
|
| 33 |
from pathlib import Path
|
| 34 |
-
REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", "
|
| 35 |
SSRI_FILE = Path(os.getenv("SSRI_FILE", "app/medicationCategories/SSRI_list.txt")).resolve()
|
| 36 |
MISC_FILE = Path(os.getenv("MISC_FILE", "app/medicationCategories/MISC_list.txt")).resolve()
|
| 37 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
|
|
@@ -59,64 +59,59 @@ Fix missing quotes, trailing commas, unescaped newlines, stray assistant labels,
|
|
| 59 |
|
| 60 |
# -------------------- JSON extraction / sanitizer ---------------------------
|
| 61 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
json_string = json_string
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
|
| 86 |
-
_esc,
|
| 87 |
-
json_string
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
# --- 5) Remove trailing commas before } or ] ---
|
| 91 |
-
json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
|
| 92 |
-
json_string = re.sub(r',\s*,', ',', json_string)
|
| 93 |
-
|
| 94 |
-
# --- 6) Balance braces if obvious excess ---
|
| 95 |
-
ob, cb = json_string.count('{'), json_string.count('}')
|
| 96 |
-
if cb > ob:
|
| 97 |
-
excess = cb - ob
|
| 98 |
-
json_string = json_string.rstrip()[:-excess]
|
| 99 |
-
|
| 100 |
-
# --- 7) Escape literal newlines inside strings so json.loads can parse ---
|
| 101 |
-
def _escape_newlines_in_strings(s: str) -> str:
|
| 102 |
-
return re.sub(
|
| 103 |
-
r'"((?:[^"\\]|\\.)*?)"',
|
| 104 |
-
lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
|
| 105 |
-
s,
|
| 106 |
-
flags=re.DOTALL
|
| 107 |
)
|
| 108 |
-
json_string = _escape_newlines_in_strings(json_string)
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# -------------------- Utility: Bloatectomy wrapper ------------------------
|
| 114 |
def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
|
| 115 |
-
"""
|
| 116 |
-
Uses the bloatectomy class to remove duplicates.
|
| 117 |
-
style: 'highlight'|'bold'|'remov' ; we use 'remov' to delete duplicates.
|
| 118 |
-
Returns cleaned text (single string).
|
| 119 |
-
"""
|
| 120 |
try:
|
| 121 |
b = bloatectomy(text, style=style, output="html")
|
| 122 |
tokens = getattr(b, "tokens", None)
|
|
@@ -129,55 +124,67 @@ def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
|
|
| 129 |
|
| 130 |
# --------------- Utility: medication extraction (adapted) -----------------
|
| 131 |
def readDrugs_from_file(path: Path):
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return {}, []
|
| 134 |
-
txt = path.read_text(encoding="utf-8", errors="ignore")
|
| 135 |
-
generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
|
| 136 |
-
generics = [g.lower() for g in generics if g]
|
| 137 |
-
lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
|
| 138 |
-
return dict(zip(generics, lines)), generics
|
| 139 |
|
| 140 |
def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
def extract_medications_from_text(text: str) -> List[str]:
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
ln
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# -------------------- Node prompts --------------------------
|
| 183 |
PATIENT_NODE_PROMPT = """
|
|
@@ -393,6 +400,7 @@ graph_builder.add_edge("condition_loop", END)
|
|
| 393 |
|
| 394 |
graph = graph_builder.compile()
|
| 395 |
|
|
|
|
| 396 |
# -------------------- Flask app & endpoints -------------------------------
|
| 397 |
BASE_DIR = Path(__file__).resolve().parent
|
| 398 |
static_folder = BASE_DIR / "static"
|
|
@@ -404,12 +412,18 @@ CORS(app) # dev convenience; lock down in production
|
|
| 404 |
def serve_frontend():
|
| 405 |
try:
|
| 406 |
return app.send_static_file("frontend.html")
|
| 407 |
-
except Exception:
|
|
|
|
| 408 |
return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
|
| 409 |
|
| 410 |
@app.route("/process_reports", methods=["POST"])
|
| 411 |
def process_reports():
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
patient_id = data.get("patient_id")
|
| 414 |
filenames = data.get("filenames", [])
|
| 415 |
extra_patient_meta = data.get("patientDetails", {})
|
|
@@ -433,9 +447,13 @@ def process_reports():
|
|
| 433 |
elements = partition_pdf(filename=str(file_path))
|
| 434 |
page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
|
| 435 |
except Exception:
|
| 436 |
-
logger.exception("Failed to parse PDF
|
| 437 |
page_text = ""
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
documents.append({
|
| 440 |
"filename": fname,
|
| 441 |
"raw_text": page_text,
|
|
@@ -447,7 +465,11 @@ def process_reports():
|
|
| 447 |
return jsonify({"error": "no valid documents found"}), 400
|
| 448 |
|
| 449 |
combined_text = "\n\n".join(combined_text_parts)
|
| 450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
initial_state = {
|
| 453 |
"patient_meta": extra_patient_meta,
|
|
@@ -462,7 +484,7 @@ def process_reports():
|
|
| 462 |
# Validate and fill placeholders if needed
|
| 463 |
if not result_state.get("valid", True):
|
| 464 |
missing = result_state.get("missing", [])
|
| 465 |
-
logger.info("Validation failed; missing keys:
|
| 466 |
if "patientDetails" in missing:
|
| 467 |
result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
|
| 468 |
if "reports" in missing:
|
|
@@ -497,3 +519,4 @@ def ping():
|
|
| 497 |
if __name__ == "__main__":
|
| 498 |
port = int(os.getenv("PORT", 7860))
|
| 499 |
app.run(host="0.0.0.0", port=port, debug=True)
|
|
|
|
|
|
| 31 |
# --- Environment & config -------------------------------------------------
|
| 32 |
load_dotenv()
|
| 33 |
from pathlib import Path
|
| 34 |
+
REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", "reports")).resolve() # e.g. /app/reports/<patient_id>/<file.pdf>
|
| 35 |
SSRI_FILE = Path(os.getenv("SSRI_FILE", "app/medicationCategories/SSRI_list.txt")).resolve()
|
| 36 |
MISC_FILE = Path(os.getenv("MISC_FILE", "app/medicationCategories/MISC_list.txt")).resolve()
|
| 37 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
|
|
|
|
| 59 |
|
| 60 |
# -------------------- JSON extraction / sanitizer ---------------------------
|
| 61 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 62 |
+
try:
|
| 63 |
+
# --- 1) Pull out the JSON code-block if present ---
|
| 64 |
+
md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|
| 65 |
+
json_string = md.group(1).strip() if md else raw_response
|
| 66 |
+
|
| 67 |
+
# --- 2) Trim to the outermost { … } so we drop any prefix/suffix junk ---
|
| 68 |
+
first, last = json_string.find('{'), json_string.rfind('}')
|
| 69 |
+
if 0 <= first < last:
|
| 70 |
+
json_string = json_string[first:last+1]
|
| 71 |
+
|
| 72 |
+
# --- 3) PRE-CLEANUP: remove rogue assistant labels, fix boolean quotes ---
|
| 73 |
+
json_string = re.sub(r'\b\w+\s*{', '{', json_string)
|
| 74 |
+
json_string = re.sub(r'"assistant"\s*:', '', json_string)
|
| 75 |
+
json_string = re.sub(r'\b(false|true)"', r'\1', json_string)
|
| 76 |
+
|
| 77 |
+
# --- 4) Escape embedded quotes in long string fields (best-effort) ---
|
| 78 |
+
def _esc(m):
|
| 79 |
+
prefix, body = m.group(1), m.group(2)
|
| 80 |
+
return prefix + body.replace('"', r'\"')
|
| 81 |
+
json_string = re.sub(
|
| 82 |
+
r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
|
| 83 |
+
_esc,
|
| 84 |
+
json_string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
)
|
|
|
|
| 86 |
|
| 87 |
+
# --- 5) Remove trailing commas before } or ] ---
|
| 88 |
+
json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
|
| 89 |
+
json_string = re.sub(r',\s*,', ',', json_string)
|
| 90 |
+
|
| 91 |
+
# --- 6) Balance braces if obvious excess ---
|
| 92 |
+
ob, cb = json_string.count('{'), json_string.count('}')
|
| 93 |
+
if cb > ob:
|
| 94 |
+
excess = cb - ob
|
| 95 |
+
json_string = json_string.rstrip()[:-excess]
|
| 96 |
+
|
| 97 |
+
# --- 7) Escape literal newlines inside strings so json.loads can parse ---
|
| 98 |
+
def _escape_newlines_in_strings(s: str) -> str:
|
| 99 |
+
return re.sub(
|
| 100 |
+
r'"((?:[^"\\]|\\.)*?)"',
|
| 101 |
+
lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
|
| 102 |
+
s,
|
| 103 |
+
flags=re.DOTALL
|
| 104 |
+
)
|
| 105 |
+
json_string = _escape_newlines_in_strings(json_string)
|
| 106 |
+
|
| 107 |
+
# Final parse
|
| 108 |
+
return json.loads(json_string)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"Failed to extract JSON from LLM response: {e}")
|
| 111 |
+
raise
|
| 112 |
|
| 113 |
# -------------------- Utility: Bloatectomy wrapper ------------------------
|
| 114 |
def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
try:
|
| 116 |
b = bloatectomy(text, style=style, output="html")
|
| 117 |
tokens = getattr(b, "tokens", None)
|
|
|
|
| 124 |
|
| 125 |
# --------------- Utility: medication extraction (adapted) -----------------
|
| 126 |
def readDrugs_from_file(path: Path):
|
| 127 |
+
try:
|
| 128 |
+
if not path.exists():
|
| 129 |
+
return {}, []
|
| 130 |
+
txt = path.read_text(encoding="utf-8", errors="ignore")
|
| 131 |
+
generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
|
| 132 |
+
generics = [g.lower() for g in generics if g]
|
| 133 |
+
lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
|
| 134 |
+
return dict(zip(generics, lines)), generics
|
| 135 |
+
except Exception:
|
| 136 |
+
logger.exception(f"Failed to read drugs from file: {path}")
|
| 137 |
return {}, []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
|
| 140 |
+
try:
|
| 141 |
+
gen_index = {g:i for i,g in enumerate(genList)}
|
| 142 |
+
for generic, pattern_line in listing.items():
|
| 143 |
+
try:
|
| 144 |
+
if re.search(pattern_line, line, re.I):
|
| 145 |
+
idx = gen_index.get(generic)
|
| 146 |
+
if idx is not None:
|
| 147 |
+
drugs_flags[idx] = 1
|
| 148 |
+
except re.error:
|
| 149 |
+
continue
|
| 150 |
+
return drugs_flags
|
| 151 |
+
except Exception:
|
| 152 |
+
logger.exception("Error in addToDrugs_line")
|
| 153 |
+
return drugs_flags
|
| 154 |
|
| 155 |
def extract_medications_from_text(text: str) -> List[str]:
|
| 156 |
+
try:
|
| 157 |
+
ssri_map, ssri_generics = readDrugs_from_file(SSRI_FILE)
|
| 158 |
+
misc_map, misc_generics = readDrugs_from_file(MISC_FILE)
|
| 159 |
+
combined_map = {**ssri_map, **misc_map}
|
| 160 |
+
combined_generics = []
|
| 161 |
+
if ssri_generics:
|
| 162 |
+
combined_generics.extend(ssri_generics)
|
| 163 |
+
if misc_generics:
|
| 164 |
+
combined_generics.extend(misc_generics)
|
| 165 |
+
|
| 166 |
+
flags = [0]* len(combined_generics)
|
| 167 |
+
meds_found = set()
|
| 168 |
+
for ln in text.splitlines():
|
| 169 |
+
ln = ln.strip()
|
| 170 |
+
if not ln:
|
| 171 |
+
continue
|
| 172 |
+
if combined_map:
|
| 173 |
+
flags = addToDrugs_line(ln, flags, combined_map, combined_generics)
|
| 174 |
+
m = re.search(r"\b(Rx|Drug|Medication|Prescribed|Tablet)\s*[:\-]?\s*([A-Za-z0-9\-\s/\.]+)", ln, re.I)
|
| 175 |
+
if m:
|
| 176 |
+
meds_found.add(m.group(2).strip())
|
| 177 |
+
m2 = re.findall(r"\b([A-Z][a-z0-9\-]{2,}\s*(?:[0-9]{1,4}\s*(?:mg|mcg|g|IU))?)", ln)
|
| 178 |
+
for s in m2:
|
| 179 |
+
if re.search(r"\b(mg|mcg|g|IU)\b", s, re.I):
|
| 180 |
+
meds_found.add(s.strip())
|
| 181 |
+
for i, f in enumerate(flags):
|
| 182 |
+
if f == 1:
|
| 183 |
+
meds_found.add(combined_generics[i])
|
| 184 |
+
return list(meds_found)
|
| 185 |
+
except Exception:
|
| 186 |
+
logger.exception("Failed to extract medications from text")
|
| 187 |
+
return []
|
| 188 |
|
| 189 |
# -------------------- Node prompts --------------------------
|
| 190 |
PATIENT_NODE_PROMPT = """
|
|
|
|
| 400 |
|
| 401 |
graph = graph_builder.compile()
|
| 402 |
|
| 403 |
+
# -------------------- Flask app & endpoints -------------------------------
|
| 404 |
# -------------------- Flask app & endpoints -------------------------------
|
| 405 |
BASE_DIR = Path(__file__).resolve().parent
|
| 406 |
static_folder = BASE_DIR / "static"
|
|
|
|
| 412 |
def serve_frontend():
|
| 413 |
try:
|
| 414 |
return app.send_static_file("frontend.html")
|
| 415 |
+
except Exception as e:
|
| 416 |
+
logger.error(f"Failed to serve frontend.html: {e}")
|
| 417 |
return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
|
| 418 |
|
| 419 |
@app.route("/process_reports", methods=["POST"])
|
| 420 |
def process_reports():
|
| 421 |
+
try:
|
| 422 |
+
data = request.get_json(force=True)
|
| 423 |
+
except Exception as e:
|
| 424 |
+
logger.error(f"Failed to parse JSON request: {e}")
|
| 425 |
+
return jsonify({"error": "Invalid JSON request"}), 400
|
| 426 |
+
|
| 427 |
patient_id = data.get("patient_id")
|
| 428 |
filenames = data.get("filenames", [])
|
| 429 |
extra_patient_meta = data.get("patientDetails", {})
|
|
|
|
| 447 |
elements = partition_pdf(filename=str(file_path))
|
| 448 |
page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
|
| 449 |
except Exception:
|
| 450 |
+
logger.exception(f"Failed to parse PDF {file_path}")
|
| 451 |
page_text = ""
|
| 452 |
+
try:
|
| 453 |
+
cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
|
| 454 |
+
except Exception:
|
| 455 |
+
logger.exception("Failed to clean notes with bloatectomy")
|
| 456 |
+
cleaned = page_text
|
| 457 |
documents.append({
|
| 458 |
"filename": fname,
|
| 459 |
"raw_text": page_text,
|
|
|
|
| 465 |
return jsonify({"error": "no valid documents found"}), 400
|
| 466 |
|
| 467 |
combined_text = "\n\n".join(combined_text_parts)
|
| 468 |
+
try:
|
| 469 |
+
meds = extract_medications_from_text(combined_text)
|
| 470 |
+
except Exception:
|
| 471 |
+
logger.exception("Failed to extract medications")
|
| 472 |
+
meds = []
|
| 473 |
|
| 474 |
initial_state = {
|
| 475 |
"patient_meta": extra_patient_meta,
|
|
|
|
| 484 |
# Validate and fill placeholders if needed
|
| 485 |
if not result_state.get("valid", True):
|
| 486 |
missing = result_state.get("missing", [])
|
| 487 |
+
logger.info(f"Validation failed; missing keys: {missing}")
|
| 488 |
if "patientDetails" in missing:
|
| 489 |
result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
|
| 490 |
if "reports" in missing:
|
|
|
|
| 519 |
if __name__ == "__main__":
|
| 520 |
port = int(os.getenv("PORT", 7860))
|
| 521 |
app.run(host="0.0.0.0", port=port, debug=True)
|
| 522 |
+
|