Spaces:

Sanjayraju30
/

logger

Sleeping

Sanjayraju30 commited on Jun 30

Commit

d22d28e

verified ·

1 Parent(s): ed22ec8

Update ocr_engine.py

Files changed (1) hide show

ocr_engine.py CHANGED Viewed

@@ -1,33 +1,30 @@
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
-# Load OCR model once
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
 def extract_weight(image: Image.Image) -> str:
-    # Ensure image is in RGB
     image = image.convert("RGB")
-    # Process with Hugging Face OCR
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     full_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    # Normalize text
-    full_text_cleaned = full_text.lower().replace(" ", "")
     # Detect unit
-    if "kg" in full_text_cleaned:
         unit = "kg"
-    elif "g" in full_text_cleaned or "gram" in full_text_cleaned:
         unit = "grams"
     else:
-        unit = "grams"  # default to grams if not clear
-    # Extract number (includes decimals)
-    import re
-    match = re.search(r"(\d+(\.\d+)?)", full_text_cleaned)
     if match:
         weight = match.group(1)
         return f"{weight} {unit}"

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+import re
+# Load model + processor
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
 def extract_weight(image: Image.Image) -> str:
     image = image.convert("RGB")
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     full_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Lowercase text but don't strip spacing before kg detection
+    full_text_lower = full_text.lower()
     # Detect unit
+    if "kg" in full_text_lower.replace(" ", ""):
         unit = "kg"
+    elif "g" in full_text_lower.replace(" ", "") or "gram" in full_text_lower:
         unit = "grams"
     else:
+        unit = "grams"  # default
+    # Extract number using regex
+    match = re.search(r"(\d+(\.\d+)?)", full_text)
     if match:
         weight = match.group(1)
         return f"{weight} {unit}"