vineelagampa commited on
Commit
ff3e1b4
·
verified ·
1 Parent(s): 08d974f

Update bert.py

Browse files
Files changed (1) hide show
  1. bert.py +319 -18
bert.py CHANGED
@@ -13,16 +13,116 @@ from spacy.util import filter_spans
13
  from spacy.matcher import Matcher
14
  import pandas as pd
15
  import re
 
 
 
 
 
16
 
17
  import google.generativeai as genai
18
- genai.configure(api_key="AIzaSyAEzAp4WBGP_RvujxUx4e_icXxhfCIRvxs")
19
  model = genai.GenerativeModel('gemini-2.5-flash-lite')
20
 
21
  non_negated_diseases = []
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if platform.system() == "Darwin":
24
- ##pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
25
- pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
26
  elif platform.system() == "Windows":
27
  pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
28
 
@@ -30,6 +130,23 @@ df = pd.read_csv("measurement.csv")
30
  df.columns = df.columns.str.lower()
31
  df['measurement'] = df['measurement'].str.lower()
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def extract_number(text):
34
  match = re.search(r'(\d+\.?\d*)', text)
35
  return float(match.group(1)) if match else None
@@ -37,20 +154,30 @@ def extract_number(text):
37
  def analyze_measurements(text, df):
38
  results = []
39
  final_numbers = []
40
- graphs_values = []
41
  for measurement in df["measurement"].unique():
42
  pattern = rf"{measurement}[^0-9]*([\d\.]+)"
43
  matches = re.findall(pattern, text, re.IGNORECASE)
 
44
  for match in matches:
45
- if measurement == "hbaic":
46
- measurement = "hba1c"
47
- value = float(match)
 
 
 
 
 
 
 
 
48
  for _, row in df[df["measurement"].str.lower() == measurement.lower()].iterrows():
49
  Condition = row['condition']
50
  if row['low'] <= value <= row['high']:
51
  results.append({
52
  "Condition" : Condition,
53
- "Measurement": measurement,
 
54
  "Value": value,
55
  "severity": row["severity"],
56
  "Range": f"{row['low']} to {row['high']} {row['unit']}"
@@ -59,12 +186,14 @@ def analyze_measurements(text, df):
59
  print (results)
60
 
61
  for res in results:
62
- final_numbers.append(f"Condition In Concern: {res['Condition']}. Measurement: {res['Measurement']} ({res['severity']}) — {res['Value']} "
63
- f"(Range: {res['Range']})")
64
-
 
65
  print("analyze measurements res:", final_numbers)
66
  return final_numbers
67
 
 
68
  nlp = spacy.load("en_core_web_sm")
69
  nlp.add_pipe("negex", config={"ent_types": ["DISEASE"]}, last=True)
70
  matcher = Matcher(nlp.vocab)
@@ -132,7 +261,6 @@ def extract_non_negated_keywords(text, threshold=80):
132
  for disease_term in diseases:
133
  disease_term_lower = disease_term.lower()
134
  match_score = fuzz.partial_ratio(disease_term_lower, sent_text)
135
- print(f"Trying to match '{disease_term_lower}' in sentence: '{sent_text.strip()}' — Match score: {match_score}")
136
 
137
  if match_score >= threshold:
138
  start = sent_text.find(disease_term_lower)
@@ -210,17 +338,190 @@ def classify_disease_and_severity(disease):
210
  print(f"Response: {numerical_response}")
211
 
212
  if 0 <= numerical_response <= 3:
213
- severity_label = (f"Low Risk: {numerical_response}")
214
  elif 3 < numerical_response <= 7:
215
- severity_label = (f"Mild Risk: {numerical_response}")
216
  elif 7 < numerical_response <= 10:
217
- severity_label = (f"Severe Risk: {numerical_response}")
218
  else:
219
- severity_label = (f"Invalid Range: {numerical_response}")
220
 
221
  except (ValueError, AttributeError):
222
  severity_label = "Null: We cannot give a clear severity label"
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  return severity_label
225
 
226
  # Links for diseases
@@ -240,7 +541,7 @@ if __name__ == '__main__':
240
  4. The patient reported no chest pain or signs of heart disease.
241
  5. Overall, there is no evidence of tumor recurrence at this time."""
242
  print(detect_past_diseases(sample_text, threshold=90))
243
- print(analyze_measurements(sample_text, df))
244
  print(extract_non_negated_keywords(sample_text, threshold=80))
 
245
 
246
-
 
13
  from spacy.matcher import Matcher
14
  import pandas as pd
15
  import re
16
+ import difflib
17
+
18
+ from api_key import GEMINI_API_KEY
19
+
20
+ hba1c = ["hbaic", "hdate", ""]
21
 
22
  import google.generativeai as genai
23
+ genai.configure(api_key=GEMINI_API_KEY)
24
  model = genai.GenerativeModel('gemini-2.5-flash-lite')
25
 
26
  non_negated_diseases = []
27
 
28
+ synonyms = {
29
+ "hba1c": ["hba1c", "hbaic", "hdate", "a1c", "hemoglobin a1c", "glycated hemoglobin", "hba", "hda", "hbic"],
30
+ "fasting glucose": ["fasting glucose", "fasting-glucose", "fasting blood sugar", "fbs"],
31
+ "ogtt": ["ogtt", "oral glucose tolerance test", "glucose tolerance test"],
32
+
33
+ "ldl": ["ldl", "ldl-c", "low density lipoprotein", "bad cholesterol"],
34
+ "hdl": ["hdl", "hdl-c", "high density lipoprotein", "good cholesterol"],
35
+ "triglycerides": ["triglycerides", "trigs", "tg"],
36
+ "total cholesterol": ["total cholesterol", "cholesterol total", "chol", "tc"],
37
+ "non-hdl": ["non-hdl", "non hdl", "nonhdl"],
38
+
39
+ # Thyroid
40
+ "tsh": ["tsh", "thyroid stimulating hormone"],
41
+ "free t4": ["free t4", "free-t4", "ft4", "free thyroxine"],
42
+ "free t3": ["free t3", "free-t3", "ft3", "free triiodothyronine"],
43
+
44
+ # Inflammation
45
+ "crp": ["crp", "c-reactive protein"],
46
+ "esr": ["esr", "erythrocyte sedimentation rate"],
47
+
48
+ # Vitamins
49
+ "vitamin-b12": ["vitamin-b12", "vitamin b12", "b12", "vit b12", "cobalamin"],
50
+ "vitamin-d": ["vitamin-d", "vitamin d", "vit d", "25-oh d", "25-hydroxy vitamin d"],
51
+ "vitamin-a": ["vitamin-a", "vitamin a", "vit a"],
52
+ "vitamin-e": ["vitamin-e", "vitamin e", "vit e"],
53
+
54
+ # Electrolytes
55
+ "sodium": ["sodium", "na"],
56
+ "potassium": ["potassium", "k"],
57
+ "calcium": ["calcium", "ca"],
58
+ "magnesium": ["magnesium", "mg"],
59
+
60
+ # Blood Pressure
61
+ "systolic": ["systolic", "sbp"],
62
+ "diastolic": ["diastolic", "dbp"],
63
+
64
+ # CBC
65
+ "wbc": ["wbc", "white blood cells", "white cell count"],
66
+ "rbc": ["rbc", "red blood cells", "red cell count"],
67
+ "hemoglobin": ["hemoglobin", "hb", "hgb"],
68
+ "hematocrit": ["hematocrit", "hct"],
69
+ "platelets": ["platelets", "plt"],
70
+
71
+ # Iron
72
+ "serum iron": ["serum iron", "iron"],
73
+ "ferritin": ["ferritin"],
74
+ "tibc": ["tibc", "total iron binding capacity"],
75
+ "transferrin saturation": ["transferrin saturation", "tsat"],
76
+
77
+ # Liver
78
+ "alt": ["alt", "sgpt"],
79
+ "ast": ["ast", "sgot"],
80
+ "alp": ["alp", "alkaline phosphatase"],
81
+ "bilirubin total": ["bilirubin total", "total bilirubin"],
82
+ "albumin": ["albumin"],
83
+
84
+ # Kidney
85
+ "creatinine": ["creatinine"],
86
+ "bun": ["bun", "blood urea nitrogen"],
87
+ "egfr": ["egfr", "estimated gfr"],
88
+ "urine protein": ["urine protein", "proteinuria"],
89
+ "urine albumin": ["urine albumin", "microalbumin"],
90
+
91
+ # Respiratory
92
+ "spo2": ["spo2", "oxygen saturation", "o2 sat"],
93
+ "pco2": ["pco2", "carbon dioxide partial pressure"],
94
+ "po2": ["po2", "oxygen partial pressure"],
95
+ "fev1": ["fev1", "forced expiratory volume"],
96
+ "fevi": ["fevi", "fev1"], # common OCR mistake
97
+
98
+ # Coagulation
99
+ "inr": ["inr"],
100
+ "pt": ["pt", "prothrombin time"],
101
+ "aptt": ["aptt", "partial thromboplastin time"],
102
+ "fibrinogen": ["fibrinogen"],
103
+
104
+ # Hormones
105
+ "cortisol": ["cortisol"],
106
+ "testosterone": ["testosterone"],
107
+ "estradiol": ["estradiol", "estrogen"],
108
+ "progesterone": ["progesterone"],
109
+
110
+ # Infection
111
+ "procalcitonin": ["procalcitonin"],
112
+ "lactate": ["lactate"],
113
+
114
+ # Cardiac extras
115
+ "troponin": ["troponin", "trop"],
116
+
117
+ # Vitals
118
+ "temperature": ["temperature", "temp", "body temp"],
119
+ "heart rate": ["heart rate", "pulse", "hr"],
120
+ "oxygen saturation": ["oxygen saturation", "spo2", "o2 sat"],
121
+ }
122
+
123
+
124
  if platform.system() == "Darwin":
125
+ pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
 
126
  elif platform.system() == "Windows":
127
  pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
128
 
 
130
  df.columns = df.columns.str.lower()
131
  df['measurement'] = df['measurement'].str.lower()
132
 
133
+ def normalize_term(term: str) -> str:
134
+ term = term.lower().strip()
135
+
136
+ for key, values in synonyms.items():
137
+ if term in values:
138
+ return key
139
+
140
+ # Fuzzy matching for OCR typos
141
+ all_terms = [t for values in synonyms.values() for t in values]
142
+ closest = difflib.get_close_matches(term, all_terms, n=1, cutoff=0.75)
143
+ if closest:
144
+ for key, values in synonyms.items():
145
+ if closest[0] in values:
146
+ return key
147
+
148
+ return term
149
+
150
  def extract_number(text):
151
  match = re.search(r'(\d+\.?\d*)', text)
152
  return float(match.group(1)) if match else None
 
154
  def analyze_measurements(text, df):
155
  results = []
156
  final_numbers = []
157
+ final_version = ()
158
  for measurement in df["measurement"].unique():
159
  pattern = rf"{measurement}[^0-9]*([\d\.]+)"
160
  matches = re.findall(pattern, text, re.IGNORECASE)
161
+
162
  for match in matches:
163
+ # Clean non-numeric characters like % or units
164
+ cleaned = re.sub(r"[^0-9.]", "", match)
165
+ if cleaned == "" or cleaned == ".":
166
+ continue # skip invalid
167
+ try:
168
+ value = float(cleaned)
169
+ except ValueError:
170
+ continue
171
+
172
+ normalized = normalize_term(measurement)
173
+
174
  for _, row in df[df["measurement"].str.lower() == measurement.lower()].iterrows():
175
  Condition = row['condition']
176
  if row['low'] <= value <= row['high']:
177
  results.append({
178
  "Condition" : Condition,
179
+ "Measurement": normalized,
180
+ "unit": row['unit'],
181
  "Value": value,
182
  "severity": row["severity"],
183
  "Range": f"{row['low']} to {row['high']} {row['unit']}"
 
186
  print (results)
187
 
188
  for res in results:
189
+ final = [res['Condition'], res['Measurement'], res['unit'], res['severity'], res['Value'], res['Range']]
190
+ # final_numbers.append(f"Condition In Concern: {res['Condition']}. Measurement: {res['Measurement']} ({res['severity']}) — {res['Value']} "
191
+ # f"(Range: {res['Range']})")
192
+ final_numbers.append(final)
193
  print("analyze measurements res:", final_numbers)
194
  return final_numbers
195
 
196
+
197
  nlp = spacy.load("en_core_web_sm")
198
  nlp.add_pipe("negex", config={"ent_types": ["DISEASE"]}, last=True)
199
  matcher = Matcher(nlp.vocab)
 
261
  for disease_term in diseases:
262
  disease_term_lower = disease_term.lower()
263
  match_score = fuzz.partial_ratio(disease_term_lower, sent_text)
 
264
 
265
  if match_score >= threshold:
266
  start = sent_text.find(disease_term_lower)
 
338
  print(f"Response: {numerical_response}")
339
 
340
  if 0 <= numerical_response <= 3:
341
+ severity_label = (f"Low Risk")
342
  elif 3 < numerical_response <= 7:
343
+ severity_label = (f"Mild Risk")
344
  elif 7 < numerical_response <= 10:
345
+ severity_label = (f"Severe Risk")
346
  else:
347
+ severity_label = (f"Invalid Range")
348
 
349
  except (ValueError, AttributeError):
350
  severity_label = "Null: We cannot give a clear severity label"
351
 
352
+
353
+ # inputs = clinical_bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=1200)
354
+ # with torch.no_grad():
355
+ # outputs = clinical_bert_model(**inputs)
356
+
357
+ # logits = outputs.logits
358
+ # predicted_class = torch.argmax(logits, dim=-1).item()
359
+
360
+ # print(f"Bert model response: {predicted_class}") # Debugging line
361
+
362
+ # severity_label = "Mild" if predicted_class == 0 else "Severe"
363
+
364
+ # text_lower = text.lower()
365
+
366
+ # if "heart" in text_lower or "cardiac" in text_lower or "myocardial" in text_lower:
367
+ # disease_label = "Heart Disease"
368
+ # elif "cancer" in text_lower or "tumor" in text_lower or "carcinoma" in text_lower or "neoplasm" in text_lower or "malignancy" in text_lower:
369
+ # disease_label = "Cancer"
370
+ # elif "diabetes" in text_lower or "hba1c" in text_lower or "blood sugar" in text_lower or "hyperglycemia" in text_lower:
371
+ # disease_label = "Diabetes"
372
+ # elif "asthma" in text_lower:
373
+ # disease_label = "Asthma"
374
+ # elif "arthritis" in text_lower or "rheumatoid arthritis" in text_lower or "osteoarthritis" in text_lower or "ra " in text_lower:
375
+ # disease_label = "Arthritis"
376
+ # elif "stroke" in text_lower or "cerebrovascular accident" in text_lower or "cva" in text_lower:
377
+ # disease_label = "Stroke"
378
+ # elif "allergy" in text_lower or "allergic" in text_lower or "hypersensitivity" in text_lower:
379
+ # disease_label = "Allergy"
380
+ # elif "hypertension" in text_lower or "high blood pressure" in text_lower or "hbp" in text_lower:
381
+ # disease_label = "Hypertension"
382
+ # elif "dengue" in text_lower:
383
+ # disease_label = "Dengue"
384
+ # elif "malaria" in text_lower:
385
+ # disease_label = "Malaria"
386
+ # elif "tuberculosis" in text_lower or "tb " in text_lower:
387
+ # disease_label = "Tuberculosis"
388
+ # elif "bronchitis" in text_lower or "chronic bronchitis" in text_lower:
389
+ # disease_label = "Bronchitis"
390
+ # elif "pneumonia" in text_lower:
391
+ # disease_label = "Pneumonia"
392
+ # elif "obesity" in text_lower or "overweight" in text_lower:
393
+ # disease_label = "Obesity"
394
+ # elif "epilepsy" in text_lower or "seizure" in text_lower or "convulsion" in text_lower:
395
+ # disease_label = "Epilepsy"
396
+ # elif "dementia" in text_lower or "alzheimer" in text_lower or "memory loss" in text_lower:
397
+ # disease_label = "Dementia"
398
+ # elif "autism" in text_lower or "asd" in text_lower:
399
+ # disease_label = "Autism Spectrum Disorder"
400
+ # elif "parkinson" in text_lower or "parkinson's disease" in text_lower:
401
+ # disease_label = "Parkinson's Disease"
402
+ # elif "leukemia" in text_lower or "blood cancer" in text_lower:
403
+ # disease_label = "Leukemia"
404
+ # elif "lymphoma" in text_lower:
405
+ # disease_label = "Lymphoma"
406
+ # elif "glaucoma" in text_lower:
407
+ # disease_label = "Glaucoma"
408
+ # elif "hepatitis" in text_lower or "liver inflammation" in text_lower:
409
+ # disease_label = "Hepatitis"
410
+ # elif "cirrhosis" in text_lower or "liver failure" in text_lower:
411
+ # disease_label = "Liver Cirrhosis"
412
+ # elif "kidney" in text_lower or "renal" in text_lower or "nephropathy" in text_lower or "ckd" in text_lower:
413
+ # disease_label = "Kidney Disease"
414
+ # elif "thyroid" in text_lower or "hyperthyroidism" in text_lower or "hypothyroidism" in text_lower:
415
+ # disease_label = "Thyroid Disorder"
416
+ # elif "hiv" in text_lower or "aids" in text_lower:
417
+ # disease_label = "HIV/AIDS"
418
+ # elif "anemia" in text_lower or "low hemoglobin" in text_lower or "iron deficiency" in text_lower:
419
+ # disease_label = "Anemia"
420
+ # elif "migraine" in text_lower or "headache" in text_lower:
421
+ # disease_label = "Migraine"
422
+ # elif "psoriasis" in text_lower:
423
+ # disease_label = "Psoriasis"
424
+ # elif "eczema" in text_lower or "atopic dermatitis" in text_lower:
425
+ # disease_label = "Eczema"
426
+ # elif "vitiligo" in text_lower:
427
+ # disease_label = "Vitiligo"
428
+ # elif "cholera" in text_lower:
429
+ # disease_label = "Cholera"
430
+ # elif "typhoid" in text_lower:
431
+ # disease_label = "Typhoid"
432
+ # elif "meningitis" in text_lower:
433
+ # disease_label = "Meningitis"
434
+ # elif "insomnia" in text_lower:
435
+ # disease_label = "Insomnia"
436
+ # elif "sleep apnea" in text_lower or "obstructive sleep apnea" in text_lower or "osa" in text_lower:
437
+ # disease_label = "Sleep Apnea"
438
+ # elif "fibromyalgia" in text_lower:
439
+ # disease_label = "Fibromyalgia"
440
+ # elif "lupus" in text_lower or "systemic lupus erythematosus" in text_lower or "sle" in text_lower:
441
+ # disease_label = "Lupus"
442
+ # elif "sclerosis" in text_lower or "multiple sclerosis" in text_lower or "ms " in text_lower:
443
+ # disease_label = "Multiple Sclerosis"
444
+ # elif "shingles" in text_lower or "herpes zoster" in text_lower:
445
+ # disease_label = "Shingles"
446
+ # elif "chickenpox" in text_lower or "varicella" in text_lower:
447
+ # disease_label = "Chickenpox"
448
+ # elif "covid" in text_lower or "corona" in text_lower or "sars-cov-2" in text_lower:
449
+ # disease_label = "COVID-19"
450
+ # elif "influenza" in text_lower or "flu" in text_lower:
451
+ # disease_label = "Influenza"
452
+ # elif "smallpox" in text_lower:
453
+ # disease_label = "Smallpox"
454
+ # elif "measles" in text_lower:
455
+ # disease_label = "Measles"
456
+ # elif "polio" in text_lower or "poliomyelitis" in text_lower:
457
+ # disease_label = "Polio"
458
+ # elif "botulism" in text_lower:
459
+ # disease_label = "Botulism"
460
+ # elif "lyme disease" in text_lower or "borreliosis" in text_lower:
461
+ # disease_label = "Lyme Disease"
462
+ # elif "zika virus" in text_lower or "zika" in text_lower:
463
+ # disease_label = "Zika Virus"
464
+ # elif "ebola" in text_lower:
465
+ # disease_label = "Ebola"
466
+ # elif "marburg virus" in text_lower:
467
+ # disease_label = "Marburg Virus"
468
+ # elif "west nile virus" in text_lower or "west nile" in text_lower:
469
+ # disease_label = "West Nile Virus"
470
+ # elif "sars" in text_lower:
471
+ # disease_label = "SARS"
472
+ # elif "mers" in text_lower:
473
+ # disease_label = "MERS"
474
+ # elif "e. coli infection" in text_lower or "ecoli" in text_lower:
475
+ # disease_label = "E. coli Infection"
476
+ # elif "salmonella" in text_lower:
477
+ # disease_label = "Salmonella"
478
+ # elif "hepatitis a" in text_lower:
479
+ # disease_label = "Hepatitis A"
480
+ # elif "hepatitis b" in text_lower:
481
+ # disease_label = "Hepatitis B"
482
+ # elif "hepatitis c" in text_lower:
483
+ # disease_label = "Hepatitis C"
484
+ # elif "rheumatoid arthritis" in text_lower:
485
+ # disease_label = "Rheumatoid Arthritis"
486
+ # elif "osteoporosis" in text_lower:
487
+ # disease_label = "Osteoporosis"
488
+ # elif "gout" in text_lower:
489
+ # disease_label = "Gout"
490
+ # elif "scleroderma" in text_lower:
491
+ # disease_label = "Scleroderma"
492
+ # elif "amyotrophic lateral sclerosis" in text_lower or "als" in text_lower:
493
+ # disease_label = "Amyotrophic Lateral Sclerosis"
494
+ # elif "muscular dystrophy" in text_lower:
495
+ # disease_label = "Muscular Dystrophy"
496
+ # elif "huntington's disease" in text_lower:
497
+ # disease_label = "Huntington's Disease"
498
+ # elif "alzheimers disease" in text_lower or "alzheimer's disease" in text_lower:
499
+ # disease_label = "Alzheimer's Disease"
500
+ # elif "chronic kidney disease" in text_lower or "ckd" in text_lower:
501
+ # disease_label = "Chronic Kidney Disease"
502
+ # elif "chronic obstructive pulmonary disease" in text_lower or "copd" in text_lower:
503
+ # disease_label = "Chronic Obstructive Pulmonary Disease"
504
+ # elif "addison's disease" in text_lower:
505
+ # disease_label = "Addison's Disease"
506
+ # elif "cushing's syndrome" in text_lower or "cushings syndrome" in text_lower:
507
+ # disease_label = "Cushing's Syndrome"
508
+ # elif "graves' disease" in text_lower or "graves disease" in text_lower:
509
+ # disease_label = "Graves' Disease"
510
+ # elif "hashimoto's thyroiditis" in text_lower or "hashimoto's disease" in text_lower:
511
+ # disease_label = "Hashimoto's Thyroiditis"
512
+ # elif "sarcoidosis" in text_lower:
513
+ # disease_label = "Sarcoidosis"
514
+ # elif "histoplasmosis" in text_lower:
515
+ # disease_label = "Histoplasmosis"
516
+ # elif "cystic fibrosis" in text_lower:
517
+ # disease_label = "Cystic Fibrosis"
518
+ # elif "epstein-barr virus" in text_lower or "ebv" in text_lower:
519
+ # disease_label = "Epstein-Barr Virus Infection"
520
+ # elif "mononucleosis" in text_lower or "mono" in text_lower:
521
+ # disease_label = "Mononucleosis"
522
+ # else:
523
+ # disease_label = "Unknown"
524
+
525
  return severity_label
526
 
527
  # Links for diseases
 
541
  4. The patient reported no chest pain or signs of heart disease.
542
  5. Overall, there is no evidence of tumor recurrence at this time."""
543
  print(detect_past_diseases(sample_text, threshold=90))
 
544
  print(extract_non_negated_keywords(sample_text, threshold=80))
545
+ print(analyze_measurements(sample_text, df))
546
 
547
+