|
import gradio as gr |
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
from PIL import Image |
|
import os |
|
import re |
|
|
|
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") |
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") |
|
|
|
|
|
PATIENT_RECORDS_DIR = "records" |
|
os.makedirs(PATIENT_RECORDS_DIR, exist_ok=True) |
|
|
|
|
|
def extract_patient_name(file_name): |
|
match = re.match(r"([A-Za-z]+[A-Za-z]*)_.*\.(jpg|jpeg|png)$", file_name) |
|
return match.group(1) if match else None |
|
|
|
|
|
def perform_ocr(image_path): |
|
image = Image.open(image_path).convert("RGB") |
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values |
|
generated_ids = model.generate(pixel_values) |
|
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() |
|
return text |
|
|
|
|
|
def save_record(patient_name, ocr_text): |
|
file_path = os.path.join(PATIENT_RECORDS_DIR, f"{patient_name}_records.txt") |
|
with open(file_path, "a") as f: |
|
f.write("\n\n===== New Lab Result =====\n") |
|
f.write(ocr_text) |
|
|
|
|
|
def process_lab_result(image_path): |
|
file_name = os.path.basename(image_path) |
|
patient_name = extract_patient_name(file_name) |
|
|
|
if not patient_name: |
|
return "β Cannot extract patient name from filename. Use format: JuanDelaCruz_2025-06-13.jpg" |
|
|
|
ocr_text = perform_ocr(image_path) |
|
save_record(patient_name, ocr_text) |
|
|
|
return f"β
OCR completed. Lab result saved for `{patient_name}`.\n\nπ Extracted Text:\n\n{ocr_text}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_lab_result, |
|
inputs=gr.File(label="Upload Lab Result (.jpg/.png)", type="filepath"), |
|
outputs="text", |
|
title="π©Ί Lab Result OCR", |
|
description="Upload a lab result image named like `JuanDelaCruz_2025-06-13.jpg`. The text will be extracted and saved to the patient's record." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|