Spaces:
Sleeping
Sleeping
Update bert.py
Browse files
bert.py
CHANGED
|
@@ -13,16 +13,116 @@ from spacy.util import filter_spans
|
|
| 13 |
from spacy.matcher import Matcher
|
| 14 |
import pandas as pd
|
| 15 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
import google.generativeai as genai
|
| 18 |
-
genai.configure(api_key=
|
| 19 |
model = genai.GenerativeModel('gemini-2.5-flash-lite')
|
| 20 |
|
| 21 |
non_negated_diseases = []
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
if platform.system() == "Darwin":
|
| 24 |
-
|
| 25 |
-
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
|
| 26 |
elif platform.system() == "Windows":
|
| 27 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
| 28 |
|
|
@@ -30,6 +130,23 @@ df = pd.read_csv("measurement.csv")
|
|
| 30 |
df.columns = df.columns.str.lower()
|
| 31 |
df['measurement'] = df['measurement'].str.lower()
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def extract_number(text):
|
| 34 |
match = re.search(r'(\d+\.?\d*)', text)
|
| 35 |
return float(match.group(1)) if match else None
|
|
@@ -37,20 +154,30 @@ def extract_number(text):
|
|
| 37 |
def analyze_measurements(text, df):
|
| 38 |
results = []
|
| 39 |
final_numbers = []
|
| 40 |
-
|
| 41 |
for measurement in df["measurement"].unique():
|
| 42 |
pattern = rf"{measurement}[^0-9]*([\d\.]+)"
|
| 43 |
matches = re.findall(pattern, text, re.IGNORECASE)
|
|
|
|
| 44 |
for match in matches:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
for _, row in df[df["measurement"].str.lower() == measurement.lower()].iterrows():
|
| 49 |
Condition = row['condition']
|
| 50 |
if row['low'] <= value <= row['high']:
|
| 51 |
results.append({
|
| 52 |
"Condition" : Condition,
|
| 53 |
-
"Measurement":
|
|
|
|
| 54 |
"Value": value,
|
| 55 |
"severity": row["severity"],
|
| 56 |
"Range": f"{row['low']} to {row['high']} {row['unit']}"
|
|
@@ -59,12 +186,14 @@ def analyze_measurements(text, df):
|
|
| 59 |
print (results)
|
| 60 |
|
| 61 |
for res in results:
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
print("analyze measurements res:", final_numbers)
|
| 66 |
return final_numbers
|
| 67 |
|
|
|
|
| 68 |
nlp = spacy.load("en_core_web_sm")
|
| 69 |
nlp.add_pipe("negex", config={"ent_types": ["DISEASE"]}, last=True)
|
| 70 |
matcher = Matcher(nlp.vocab)
|
|
@@ -132,7 +261,6 @@ def extract_non_negated_keywords(text, threshold=80):
|
|
| 132 |
for disease_term in diseases:
|
| 133 |
disease_term_lower = disease_term.lower()
|
| 134 |
match_score = fuzz.partial_ratio(disease_term_lower, sent_text)
|
| 135 |
-
print(f"Trying to match '{disease_term_lower}' in sentence: '{sent_text.strip()}' — Match score: {match_score}")
|
| 136 |
|
| 137 |
if match_score >= threshold:
|
| 138 |
start = sent_text.find(disease_term_lower)
|
|
@@ -210,17 +338,190 @@ def classify_disease_and_severity(disease):
|
|
| 210 |
print(f"Response: {numerical_response}")
|
| 211 |
|
| 212 |
if 0 <= numerical_response <= 3:
|
| 213 |
-
severity_label = (f"Low Risk
|
| 214 |
elif 3 < numerical_response <= 7:
|
| 215 |
-
severity_label = (f"Mild Risk
|
| 216 |
elif 7 < numerical_response <= 10:
|
| 217 |
-
severity_label = (f"Severe Risk
|
| 218 |
else:
|
| 219 |
-
severity_label = (f"Invalid Range
|
| 220 |
|
| 221 |
except (ValueError, AttributeError):
|
| 222 |
severity_label = "Null: We cannot give a clear severity label"
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
return severity_label
|
| 225 |
|
| 226 |
# Links for diseases
|
|
@@ -240,7 +541,7 @@ if __name__ == '__main__':
|
|
| 240 |
4. The patient reported no chest pain or signs of heart disease.
|
| 241 |
5. Overall, there is no evidence of tumor recurrence at this time."""
|
| 242 |
print(detect_past_diseases(sample_text, threshold=90))
|
| 243 |
-
print(analyze_measurements(sample_text, df))
|
| 244 |
print(extract_non_negated_keywords(sample_text, threshold=80))
|
|
|
|
| 245 |
|
| 246 |
-
|
|
|
|
| 13 |
from spacy.matcher import Matcher
|
| 14 |
import pandas as pd
|
| 15 |
import re
|
| 16 |
+
import difflib
|
| 17 |
+
|
| 18 |
+
from api_key import GEMINI_API_KEY
|
| 19 |
+
|
| 20 |
+
hba1c = ["hbaic", "hdate", ""]
|
| 21 |
|
| 22 |
import google.generativeai as genai
|
| 23 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 24 |
model = genai.GenerativeModel('gemini-2.5-flash-lite')
|
| 25 |
|
| 26 |
non_negated_diseases = []
|
| 27 |
|
| 28 |
+
synonyms = {
|
| 29 |
+
"hba1c": ["hba1c", "hbaic", "hdate", "a1c", "hemoglobin a1c", "glycated hemoglobin", "hba", "hda", "hbic"],
|
| 30 |
+
"fasting glucose": ["fasting glucose", "fasting-glucose", "fasting blood sugar", "fbs"],
|
| 31 |
+
"ogtt": ["ogtt", "oral glucose tolerance test", "glucose tolerance test"],
|
| 32 |
+
|
| 33 |
+
"ldl": ["ldl", "ldl-c", "low density lipoprotein", "bad cholesterol"],
|
| 34 |
+
"hdl": ["hdl", "hdl-c", "high density lipoprotein", "good cholesterol"],
|
| 35 |
+
"triglycerides": ["triglycerides", "trigs", "tg"],
|
| 36 |
+
"total cholesterol": ["total cholesterol", "cholesterol total", "chol", "tc"],
|
| 37 |
+
"non-hdl": ["non-hdl", "non hdl", "nonhdl"],
|
| 38 |
+
|
| 39 |
+
# Thyroid
|
| 40 |
+
"tsh": ["tsh", "thyroid stimulating hormone"],
|
| 41 |
+
"free t4": ["free t4", "free-t4", "ft4", "free thyroxine"],
|
| 42 |
+
"free t3": ["free t3", "free-t3", "ft3", "free triiodothyronine"],
|
| 43 |
+
|
| 44 |
+
# Inflammation
|
| 45 |
+
"crp": ["crp", "c-reactive protein"],
|
| 46 |
+
"esr": ["esr", "erythrocyte sedimentation rate"],
|
| 47 |
+
|
| 48 |
+
# Vitamins
|
| 49 |
+
"vitamin-b12": ["vitamin-b12", "vitamin b12", "b12", "vit b12", "cobalamin"],
|
| 50 |
+
"vitamin-d": ["vitamin-d", "vitamin d", "vit d", "25-oh d", "25-hydroxy vitamin d"],
|
| 51 |
+
"vitamin-a": ["vitamin-a", "vitamin a", "vit a"],
|
| 52 |
+
"vitamin-e": ["vitamin-e", "vitamin e", "vit e"],
|
| 53 |
+
|
| 54 |
+
# Electrolytes
|
| 55 |
+
"sodium": ["sodium", "na"],
|
| 56 |
+
"potassium": ["potassium", "k"],
|
| 57 |
+
"calcium": ["calcium", "ca"],
|
| 58 |
+
"magnesium": ["magnesium", "mg"],
|
| 59 |
+
|
| 60 |
+
# Blood Pressure
|
| 61 |
+
"systolic": ["systolic", "sbp"],
|
| 62 |
+
"diastolic": ["diastolic", "dbp"],
|
| 63 |
+
|
| 64 |
+
# CBC
|
| 65 |
+
"wbc": ["wbc", "white blood cells", "white cell count"],
|
| 66 |
+
"rbc": ["rbc", "red blood cells", "red cell count"],
|
| 67 |
+
"hemoglobin": ["hemoglobin", "hb", "hgb"],
|
| 68 |
+
"hematocrit": ["hematocrit", "hct"],
|
| 69 |
+
"platelets": ["platelets", "plt"],
|
| 70 |
+
|
| 71 |
+
# Iron
|
| 72 |
+
"serum iron": ["serum iron", "iron"],
|
| 73 |
+
"ferritin": ["ferritin"],
|
| 74 |
+
"tibc": ["tibc", "total iron binding capacity"],
|
| 75 |
+
"transferrin saturation": ["transferrin saturation", "tsat"],
|
| 76 |
+
|
| 77 |
+
# Liver
|
| 78 |
+
"alt": ["alt", "sgpt"],
|
| 79 |
+
"ast": ["ast", "sgot"],
|
| 80 |
+
"alp": ["alp", "alkaline phosphatase"],
|
| 81 |
+
"bilirubin total": ["bilirubin total", "total bilirubin"],
|
| 82 |
+
"albumin": ["albumin"],
|
| 83 |
+
|
| 84 |
+
# Kidney
|
| 85 |
+
"creatinine": ["creatinine"],
|
| 86 |
+
"bun": ["bun", "blood urea nitrogen"],
|
| 87 |
+
"egfr": ["egfr", "estimated gfr"],
|
| 88 |
+
"urine protein": ["urine protein", "proteinuria"],
|
| 89 |
+
"urine albumin": ["urine albumin", "microalbumin"],
|
| 90 |
+
|
| 91 |
+
# Respiratory
|
| 92 |
+
"spo2": ["spo2", "oxygen saturation", "o2 sat"],
|
| 93 |
+
"pco2": ["pco2", "carbon dioxide partial pressure"],
|
| 94 |
+
"po2": ["po2", "oxygen partial pressure"],
|
| 95 |
+
"fev1": ["fev1", "forced expiratory volume"],
|
| 96 |
+
"fevi": ["fevi", "fev1"], # common OCR mistake
|
| 97 |
+
|
| 98 |
+
# Coagulation
|
| 99 |
+
"inr": ["inr"],
|
| 100 |
+
"pt": ["pt", "prothrombin time"],
|
| 101 |
+
"aptt": ["aptt", "partial thromboplastin time"],
|
| 102 |
+
"fibrinogen": ["fibrinogen"],
|
| 103 |
+
|
| 104 |
+
# Hormones
|
| 105 |
+
"cortisol": ["cortisol"],
|
| 106 |
+
"testosterone": ["testosterone"],
|
| 107 |
+
"estradiol": ["estradiol", "estrogen"],
|
| 108 |
+
"progesterone": ["progesterone"],
|
| 109 |
+
|
| 110 |
+
# Infection
|
| 111 |
+
"procalcitonin": ["procalcitonin"],
|
| 112 |
+
"lactate": ["lactate"],
|
| 113 |
+
|
| 114 |
+
# Cardiac extras
|
| 115 |
+
"troponin": ["troponin", "trop"],
|
| 116 |
+
|
| 117 |
+
# Vitals
|
| 118 |
+
"temperature": ["temperature", "temp", "body temp"],
|
| 119 |
+
"heart rate": ["heart rate", "pulse", "hr"],
|
| 120 |
+
"oxygen saturation": ["oxygen saturation", "spo2", "o2 sat"],
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
|
| 124 |
if platform.system() == "Darwin":
|
| 125 |
+
pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
|
|
|
|
| 126 |
elif platform.system() == "Windows":
|
| 127 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
| 128 |
|
|
|
|
| 130 |
df.columns = df.columns.str.lower()
|
| 131 |
df['measurement'] = df['measurement'].str.lower()
|
| 132 |
|
| 133 |
+
def normalize_term(term: str) -> str:
|
| 134 |
+
term = term.lower().strip()
|
| 135 |
+
|
| 136 |
+
for key, values in synonyms.items():
|
| 137 |
+
if term in values:
|
| 138 |
+
return key
|
| 139 |
+
|
| 140 |
+
# Fuzzy matching for OCR typos
|
| 141 |
+
all_terms = [t for values in synonyms.values() for t in values]
|
| 142 |
+
closest = difflib.get_close_matches(term, all_terms, n=1, cutoff=0.75)
|
| 143 |
+
if closest:
|
| 144 |
+
for key, values in synonyms.items():
|
| 145 |
+
if closest[0] in values:
|
| 146 |
+
return key
|
| 147 |
+
|
| 148 |
+
return term
|
| 149 |
+
|
| 150 |
def extract_number(text):
|
| 151 |
match = re.search(r'(\d+\.?\d*)', text)
|
| 152 |
return float(match.group(1)) if match else None
|
|
|
|
| 154 |
def analyze_measurements(text, df):
|
| 155 |
results = []
|
| 156 |
final_numbers = []
|
| 157 |
+
final_version = ()
|
| 158 |
for measurement in df["measurement"].unique():
|
| 159 |
pattern = rf"{measurement}[^0-9]*([\d\.]+)"
|
| 160 |
matches = re.findall(pattern, text, re.IGNORECASE)
|
| 161 |
+
|
| 162 |
for match in matches:
|
| 163 |
+
# Clean non-numeric characters like % or units
|
| 164 |
+
cleaned = re.sub(r"[^0-9.]", "", match)
|
| 165 |
+
if cleaned == "" or cleaned == ".":
|
| 166 |
+
continue # skip invalid
|
| 167 |
+
try:
|
| 168 |
+
value = float(cleaned)
|
| 169 |
+
except ValueError:
|
| 170 |
+
continue
|
| 171 |
+
|
| 172 |
+
normalized = normalize_term(measurement)
|
| 173 |
+
|
| 174 |
for _, row in df[df["measurement"].str.lower() == measurement.lower()].iterrows():
|
| 175 |
Condition = row['condition']
|
| 176 |
if row['low'] <= value <= row['high']:
|
| 177 |
results.append({
|
| 178 |
"Condition" : Condition,
|
| 179 |
+
"Measurement": normalized,
|
| 180 |
+
"unit": row['unit'],
|
| 181 |
"Value": value,
|
| 182 |
"severity": row["severity"],
|
| 183 |
"Range": f"{row['low']} to {row['high']} {row['unit']}"
|
|
|
|
| 186 |
print (results)
|
| 187 |
|
| 188 |
for res in results:
|
| 189 |
+
final = [res['Condition'], res['Measurement'], res['unit'], res['severity'], res['Value'], res['Range']]
|
| 190 |
+
# final_numbers.append(f"Condition In Concern: {res['Condition']}. Measurement: {res['Measurement']} ({res['severity']}) — {res['Value']} "
|
| 191 |
+
# f"(Range: {res['Range']})")
|
| 192 |
+
final_numbers.append(final)
|
| 193 |
print("analyze measurements res:", final_numbers)
|
| 194 |
return final_numbers
|
| 195 |
|
| 196 |
+
|
| 197 |
nlp = spacy.load("en_core_web_sm")
|
| 198 |
nlp.add_pipe("negex", config={"ent_types": ["DISEASE"]}, last=True)
|
| 199 |
matcher = Matcher(nlp.vocab)
|
|
|
|
| 261 |
for disease_term in diseases:
|
| 262 |
disease_term_lower = disease_term.lower()
|
| 263 |
match_score = fuzz.partial_ratio(disease_term_lower, sent_text)
|
|
|
|
| 264 |
|
| 265 |
if match_score >= threshold:
|
| 266 |
start = sent_text.find(disease_term_lower)
|
|
|
|
| 338 |
print(f"Response: {numerical_response}")
|
| 339 |
|
| 340 |
if 0 <= numerical_response <= 3:
|
| 341 |
+
severity_label = (f"Low Risk")
|
| 342 |
elif 3 < numerical_response <= 7:
|
| 343 |
+
severity_label = (f"Mild Risk")
|
| 344 |
elif 7 < numerical_response <= 10:
|
| 345 |
+
severity_label = (f"Severe Risk")
|
| 346 |
else:
|
| 347 |
+
severity_label = (f"Invalid Range")
|
| 348 |
|
| 349 |
except (ValueError, AttributeError):
|
| 350 |
severity_label = "Null: We cannot give a clear severity label"
|
| 351 |
|
| 352 |
+
|
| 353 |
+
# inputs = clinical_bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=1200)
|
| 354 |
+
# with torch.no_grad():
|
| 355 |
+
# outputs = clinical_bert_model(**inputs)
|
| 356 |
+
|
| 357 |
+
# logits = outputs.logits
|
| 358 |
+
# predicted_class = torch.argmax(logits, dim=-1).item()
|
| 359 |
+
|
| 360 |
+
# print(f"Bert model response: {predicted_class}") # Debugging line
|
| 361 |
+
|
| 362 |
+
# severity_label = "Mild" if predicted_class == 0 else "Severe"
|
| 363 |
+
|
| 364 |
+
# text_lower = text.lower()
|
| 365 |
+
|
| 366 |
+
# if "heart" in text_lower or "cardiac" in text_lower or "myocardial" in text_lower:
|
| 367 |
+
# disease_label = "Heart Disease"
|
| 368 |
+
# elif "cancer" in text_lower or "tumor" in text_lower or "carcinoma" in text_lower or "neoplasm" in text_lower or "malignancy" in text_lower:
|
| 369 |
+
# disease_label = "Cancer"
|
| 370 |
+
# elif "diabetes" in text_lower or "hba1c" in text_lower or "blood sugar" in text_lower or "hyperglycemia" in text_lower:
|
| 371 |
+
# disease_label = "Diabetes"
|
| 372 |
+
# elif "asthma" in text_lower:
|
| 373 |
+
# disease_label = "Asthma"
|
| 374 |
+
# elif "arthritis" in text_lower or "rheumatoid arthritis" in text_lower or "osteoarthritis" in text_lower or "ra " in text_lower:
|
| 375 |
+
# disease_label = "Arthritis"
|
| 376 |
+
# elif "stroke" in text_lower or "cerebrovascular accident" in text_lower or "cva" in text_lower:
|
| 377 |
+
# disease_label = "Stroke"
|
| 378 |
+
# elif "allergy" in text_lower or "allergic" in text_lower or "hypersensitivity" in text_lower:
|
| 379 |
+
# disease_label = "Allergy"
|
| 380 |
+
# elif "hypertension" in text_lower or "high blood pressure" in text_lower or "hbp" in text_lower:
|
| 381 |
+
# disease_label = "Hypertension"
|
| 382 |
+
# elif "dengue" in text_lower:
|
| 383 |
+
# disease_label = "Dengue"
|
| 384 |
+
# elif "malaria" in text_lower:
|
| 385 |
+
# disease_label = "Malaria"
|
| 386 |
+
# elif "tuberculosis" in text_lower or "tb " in text_lower:
|
| 387 |
+
# disease_label = "Tuberculosis"
|
| 388 |
+
# elif "bronchitis" in text_lower or "chronic bronchitis" in text_lower:
|
| 389 |
+
# disease_label = "Bronchitis"
|
| 390 |
+
# elif "pneumonia" in text_lower:
|
| 391 |
+
# disease_label = "Pneumonia"
|
| 392 |
+
# elif "obesity" in text_lower or "overweight" in text_lower:
|
| 393 |
+
# disease_label = "Obesity"
|
| 394 |
+
# elif "epilepsy" in text_lower or "seizure" in text_lower or "convulsion" in text_lower:
|
| 395 |
+
# disease_label = "Epilepsy"
|
| 396 |
+
# elif "dementia" in text_lower or "alzheimer" in text_lower or "memory loss" in text_lower:
|
| 397 |
+
# disease_label = "Dementia"
|
| 398 |
+
# elif "autism" in text_lower or "asd" in text_lower:
|
| 399 |
+
# disease_label = "Autism Spectrum Disorder"
|
| 400 |
+
# elif "parkinson" in text_lower or "parkinson's disease" in text_lower:
|
| 401 |
+
# disease_label = "Parkinson's Disease"
|
| 402 |
+
# elif "leukemia" in text_lower or "blood cancer" in text_lower:
|
| 403 |
+
# disease_label = "Leukemia"
|
| 404 |
+
# elif "lymphoma" in text_lower:
|
| 405 |
+
# disease_label = "Lymphoma"
|
| 406 |
+
# elif "glaucoma" in text_lower:
|
| 407 |
+
# disease_label = "Glaucoma"
|
| 408 |
+
# elif "hepatitis" in text_lower or "liver inflammation" in text_lower:
|
| 409 |
+
# disease_label = "Hepatitis"
|
| 410 |
+
# elif "cirrhosis" in text_lower or "liver failure" in text_lower:
|
| 411 |
+
# disease_label = "Liver Cirrhosis"
|
| 412 |
+
# elif "kidney" in text_lower or "renal" in text_lower or "nephropathy" in text_lower or "ckd" in text_lower:
|
| 413 |
+
# disease_label = "Kidney Disease"
|
| 414 |
+
# elif "thyroid" in text_lower or "hyperthyroidism" in text_lower or "hypothyroidism" in text_lower:
|
| 415 |
+
# disease_label = "Thyroid Disorder"
|
| 416 |
+
# elif "hiv" in text_lower or "aids" in text_lower:
|
| 417 |
+
# disease_label = "HIV/AIDS"
|
| 418 |
+
# elif "anemia" in text_lower or "low hemoglobin" in text_lower or "iron deficiency" in text_lower:
|
| 419 |
+
# disease_label = "Anemia"
|
| 420 |
+
# elif "migraine" in text_lower or "headache" in text_lower:
|
| 421 |
+
# disease_label = "Migraine"
|
| 422 |
+
# elif "psoriasis" in text_lower:
|
| 423 |
+
# disease_label = "Psoriasis"
|
| 424 |
+
# elif "eczema" in text_lower or "atopic dermatitis" in text_lower:
|
| 425 |
+
# disease_label = "Eczema"
|
| 426 |
+
# elif "vitiligo" in text_lower:
|
| 427 |
+
# disease_label = "Vitiligo"
|
| 428 |
+
# elif "cholera" in text_lower:
|
| 429 |
+
# disease_label = "Cholera"
|
| 430 |
+
# elif "typhoid" in text_lower:
|
| 431 |
+
# disease_label = "Typhoid"
|
| 432 |
+
# elif "meningitis" in text_lower:
|
| 433 |
+
# disease_label = "Meningitis"
|
| 434 |
+
# elif "insomnia" in text_lower:
|
| 435 |
+
# disease_label = "Insomnia"
|
| 436 |
+
# elif "sleep apnea" in text_lower or "obstructive sleep apnea" in text_lower or "osa" in text_lower:
|
| 437 |
+
# disease_label = "Sleep Apnea"
|
| 438 |
+
# elif "fibromyalgia" in text_lower:
|
| 439 |
+
# disease_label = "Fibromyalgia"
|
| 440 |
+
# elif "lupus" in text_lower or "systemic lupus erythematosus" in text_lower or "sle" in text_lower:
|
| 441 |
+
# disease_label = "Lupus"
|
| 442 |
+
# elif "sclerosis" in text_lower or "multiple sclerosis" in text_lower or "ms " in text_lower:
|
| 443 |
+
# disease_label = "Multiple Sclerosis"
|
| 444 |
+
# elif "shingles" in text_lower or "herpes zoster" in text_lower:
|
| 445 |
+
# disease_label = "Shingles"
|
| 446 |
+
# elif "chickenpox" in text_lower or "varicella" in text_lower:
|
| 447 |
+
# disease_label = "Chickenpox"
|
| 448 |
+
# elif "covid" in text_lower or "corona" in text_lower or "sars-cov-2" in text_lower:
|
| 449 |
+
# disease_label = "COVID-19"
|
| 450 |
+
# elif "influenza" in text_lower or "flu" in text_lower:
|
| 451 |
+
# disease_label = "Influenza"
|
| 452 |
+
# elif "smallpox" in text_lower:
|
| 453 |
+
# disease_label = "Smallpox"
|
| 454 |
+
# elif "measles" in text_lower:
|
| 455 |
+
# disease_label = "Measles"
|
| 456 |
+
# elif "polio" in text_lower or "poliomyelitis" in text_lower:
|
| 457 |
+
# disease_label = "Polio"
|
| 458 |
+
# elif "botulism" in text_lower:
|
| 459 |
+
# disease_label = "Botulism"
|
| 460 |
+
# elif "lyme disease" in text_lower or "borreliosis" in text_lower:
|
| 461 |
+
# disease_label = "Lyme Disease"
|
| 462 |
+
# elif "zika virus" in text_lower or "zika" in text_lower:
|
| 463 |
+
# disease_label = "Zika Virus"
|
| 464 |
+
# elif "ebola" in text_lower:
|
| 465 |
+
# disease_label = "Ebola"
|
| 466 |
+
# elif "marburg virus" in text_lower:
|
| 467 |
+
# disease_label = "Marburg Virus"
|
| 468 |
+
# elif "west nile virus" in text_lower or "west nile" in text_lower:
|
| 469 |
+
# disease_label = "West Nile Virus"
|
| 470 |
+
# elif "sars" in text_lower:
|
| 471 |
+
# disease_label = "SARS"
|
| 472 |
+
# elif "mers" in text_lower:
|
| 473 |
+
# disease_label = "MERS"
|
| 474 |
+
# elif "e. coli infection" in text_lower or "ecoli" in text_lower:
|
| 475 |
+
# disease_label = "E. coli Infection"
|
| 476 |
+
# elif "salmonella" in text_lower:
|
| 477 |
+
# disease_label = "Salmonella"
|
| 478 |
+
# elif "hepatitis a" in text_lower:
|
| 479 |
+
# disease_label = "Hepatitis A"
|
| 480 |
+
# elif "hepatitis b" in text_lower:
|
| 481 |
+
# disease_label = "Hepatitis B"
|
| 482 |
+
# elif "hepatitis c" in text_lower:
|
| 483 |
+
# disease_label = "Hepatitis C"
|
| 484 |
+
# elif "rheumatoid arthritis" in text_lower:
|
| 485 |
+
# disease_label = "Rheumatoid Arthritis"
|
| 486 |
+
# elif "osteoporosis" in text_lower:
|
| 487 |
+
# disease_label = "Osteoporosis"
|
| 488 |
+
# elif "gout" in text_lower:
|
| 489 |
+
# disease_label = "Gout"
|
| 490 |
+
# elif "scleroderma" in text_lower:
|
| 491 |
+
# disease_label = "Scleroderma"
|
| 492 |
+
# elif "amyotrophic lateral sclerosis" in text_lower or "als" in text_lower:
|
| 493 |
+
# disease_label = "Amyotrophic Lateral Sclerosis"
|
| 494 |
+
# elif "muscular dystrophy" in text_lower:
|
| 495 |
+
# disease_label = "Muscular Dystrophy"
|
| 496 |
+
# elif "huntington's disease" in text_lower:
|
| 497 |
+
# disease_label = "Huntington's Disease"
|
| 498 |
+
# elif "alzheimers disease" in text_lower or "alzheimer's disease" in text_lower:
|
| 499 |
+
# disease_label = "Alzheimer's Disease"
|
| 500 |
+
# elif "chronic kidney disease" in text_lower or "ckd" in text_lower:
|
| 501 |
+
# disease_label = "Chronic Kidney Disease"
|
| 502 |
+
# elif "chronic obstructive pulmonary disease" in text_lower or "copd" in text_lower:
|
| 503 |
+
# disease_label = "Chronic Obstructive Pulmonary Disease"
|
| 504 |
+
# elif "addison's disease" in text_lower:
|
| 505 |
+
# disease_label = "Addison's Disease"
|
| 506 |
+
# elif "cushing's syndrome" in text_lower or "cushings syndrome" in text_lower:
|
| 507 |
+
# disease_label = "Cushing's Syndrome"
|
| 508 |
+
# elif "graves' disease" in text_lower or "graves disease" in text_lower:
|
| 509 |
+
# disease_label = "Graves' Disease"
|
| 510 |
+
# elif "hashimoto's thyroiditis" in text_lower or "hashimoto's disease" in text_lower:
|
| 511 |
+
# disease_label = "Hashimoto's Thyroiditis"
|
| 512 |
+
# elif "sarcoidosis" in text_lower:
|
| 513 |
+
# disease_label = "Sarcoidosis"
|
| 514 |
+
# elif "histoplasmosis" in text_lower:
|
| 515 |
+
# disease_label = "Histoplasmosis"
|
| 516 |
+
# elif "cystic fibrosis" in text_lower:
|
| 517 |
+
# disease_label = "Cystic Fibrosis"
|
| 518 |
+
# elif "epstein-barr virus" in text_lower or "ebv" in text_lower:
|
| 519 |
+
# disease_label = "Epstein-Barr Virus Infection"
|
| 520 |
+
# elif "mononucleosis" in text_lower or "mono" in text_lower:
|
| 521 |
+
# disease_label = "Mononucleosis"
|
| 522 |
+
# else:
|
| 523 |
+
# disease_label = "Unknown"
|
| 524 |
+
|
| 525 |
return severity_label
|
| 526 |
|
| 527 |
# Links for diseases
|
|
|
|
| 541 |
4. The patient reported no chest pain or signs of heart disease.
|
| 542 |
5. Overall, there is no evidence of tumor recurrence at this time."""
|
| 543 |
print(detect_past_diseases(sample_text, threshold=90))
|
|
|
|
| 544 |
print(extract_non_negated_keywords(sample_text, threshold=80))
|
| 545 |
+
print(analyze_measurements(sample_text, df))
|
| 546 |
|
| 547 |
+
|