AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on Jun 25

Commit

25aa654

verified ·

1 Parent(s): 5699ebb

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +392 -20

ocr_engine.py CHANGED Viewed

@@ -3,36 +3,408 @@ import numpy as np
 import cv2
 import re
 import logging
 from PIL import Image
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def preprocess_image(img):
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
-    blurred = cv2.GaussianBlur(resized, (3, 3), 0)
-    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                   cv2.THRESH_BINARY_INV, 11, 2)
-    return thresh
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        processed = preprocess_image(img)
-        config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg'
-        raw_text = pytesseract.image_to_string(processed, config=config)
-        logging.info(f"OCR Raw Output: {raw_text}")
-        cleaned = raw_text.replace(" ", "").replace("\n", "")
-        match = re.search(r"(\d+\.?\d*)", cleaned)
-        if match:
-            value = float(match.group(1))
-            if 0 < value <= 5000:
-                return str(value), 90.0
-        return "Not detected", 0.0
-    except Exception as e:
-        logging.error(f"OCR error: {e}")
         return "Not detected", 0.0

 import cv2
 import re
 import logging
+from datetime import datetime
+import os
 from PIL import Image
+# Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Directory for debug images
+DEBUG_DIR = "debug_images"
+os.makedirs(DEBUG_DIR, exist_ok=True)
+def save_debug_image(img, filename_suffix, prefix=""):
+    """Save image to debug directory with timestamp."""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
+    if isinstance(img, Image.Image):
+        img.save(filename)
+    elif len(img.shape) == 3:
+        cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    else:
+        cv2.imwrite(filename, img)
+    logging.info(f"Saved debug image: {filename}")
+def estimate_brightness(img):
+    """Estimate image brightness."""
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    return np.mean(gray)
 def preprocess_image(img):
+    """Preprocess image with simplified, robust contrast enhancement."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    brightness = estimate_brightness(img)
+    # Apply mild CLAHE for contrast
+    clahe_clip = 8.0 if brightness < 90 else 4.0
+    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
+    enhanced = clahe.apply(gray)
+    save_debug_image(enhanced, "01_preprocess_clahe")
+    # Light blur to reduce noise
+    blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
+    save_debug_image(blurred, "02_preprocess_blur")
+    # Dynamic thresholding with larger block size for small displays
+    block_size = max(7, min(31, int(img.shape[0] / 20) * 2 + 1))
+    thresh = cv2.adaptiveThreshold(
+        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY_INV, block_size, 3
+    )
+    # Minimal morphological operations
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+    save_debug_image(thresh, "03_preprocess_morph")
+    return thresh, enhanced
+def correct_rotation(img):
+    """Correct image rotation using edge detection."""
+    try:
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 30, 100, apertureSize=3)
+        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=25, minLineLength=15, maxLineGap=10)
+        if lines is not None:
+            angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
+            angle = np.median(angles)
+            if abs(angle) > 0.3:
+                h, w = img.shape[:2]
+                center = (w // 2, h // 2)
+                M = cv2.getRotationMatrix2D(center, angle, 1.0)
+                img = cv2.warpAffine(img, M, (w, h))
+                save_debug_image(img, "00_rotated_image")
+                logging.info(f"Applied rotation: {angle:.2f} degrees")
+        return img
+    except Exception as e:
+        logging.error(f"Rotation correction failed: {str(e)}")
+        return img
+def detect_roi(img):
+    """Detect region of interest with broader contour analysis."""
+    try:
+        save_debug_image(img, "04_original")
+        thresh, enhanced = preprocess_image(img)
+        brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        block_sizes = [max(7, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 20]]
+        valid_contours = []
+        img_area = img.shape[0] * img.shape[1]
+        for block_size in block_sizes:
+            temp_thresh = cv2.adaptiveThreshold(
+                enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                cv2.THRESH_BINARY_INV, block_size, 3
+            )
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
+            save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
+            contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            for c in contours:
+                area = cv2.contourArea(c)
+                x, y, w, h = cv2.boundingRect(c)
+                roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
+                aspect_ratio = w / h
+                if (50 < area < (img_area * 0.95) and
+                    0.05 <= aspect_ratio <= 20.0 and w > 20 and h > 8 and roi_brightness > 15):
+                    valid_contours.append((c, area * roi_brightness))
+                    logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
+        if valid_contours:
+            contour, _ = max(valid_contours, key=lambda x: x[1])
+            x, y, w, h = cv2.boundingRect(contour)
+            padding = max(5, min(20, int(min(w, h) * 0.4)))
+            x, y = max(0, x - padding), max(0, y - padding)
+            w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
+            roi_img = img[y:y+h, x:x+w]
+            save_debug_image(roi_img, "06_detected_roi")
+            logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
+            return roi_img, (x, y, w, h)
+        logging.info("No ROI found, using full image.")
+        save_debug_image(img, "06_no_roi_fallback")
+        return img, None
+    except Exception as e:
+        logging.error(f"ROI detection failed: {str(e)}")
+        save_debug_image(img, "06_roi_error_fallback")
+        return img, None
+def detect_digit_template(digit_img, brightness):
+    """Digit recognition with expanded template matching."""
+    try:
+        h, w = digit_img.shape
+        if h < 5 or w < 2:
+            logging.debug("Digit image too small for template matching.")
+            return None
+        # Expanded digit templates for seven-segment display variations
+        digit_templates = {
+            '0': [
+                np.array([[1, 1, 1, 1, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [1, 0, 0, 1],
+                          [1, 0, 0, 1],
+                          [1, 0, 0, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '1': [
+                np.array([[0, 0, 1, 0, 0],
+                          [0, 0, 1, 0, 0],
+                          [0, 0, 1, 0, 0],
+                          [0, 0, 1, 0, 0],
+                          [0, 0, 1, 0, 0]], dtype=np.float32),
+                np.array([[0, 1, 0],
+                          [0, 1, 0],
+                          [0, 1, 0],
+                          [0, 1, 0],
+                          [0, 1, 0]], dtype=np.float32)
+            ],
+            '2': [
+                np.array([[1, 1, 1, 1, 1],
+                          [0, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1],
+                          [1, 1, 0, 0, 0],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [0, 0, 1, 1],
+                          [1, 1, 1, 1],
+                          [1, 1, 0, 0],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '3': [
+                np.array([[1, 1, 1, 1, 1],
+                          [0, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1],
+                          [0, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [0, 0, 1, 1],
+                          [1, 1, 1, 1],
+                          [0, 0, 1, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '4': [
+                np.array([[1, 1, 0, 0, 1],
+                          [1, 1, 0, 0, 1],
+                          [1, 1, 1, 1, 1],
+                          [0, 0, 0, 0, 1],
+                          [0, 0, 0, 0, 1]], dtype=np.float32),
+                np.array([[1, 0, 0, 1],
+                          [1, 0, 0, 1],
+                          [1, 1, 1, 1],
+                          [0, 0, 0, 1],
+                          [0, 0, 0, 1]], dtype=np.float32)
+            ],
+            '5': [
+                np.array([[1, 1, 1, 1, 1],
+                          [1, 1, 0, 0, 0],
+                          [1, 1, 1, 1, 1],
+                          [0, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [1, 1, 0, 0],
+                          [1, 1, 1, 1],
+                          [0, 0, 1, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '6': [
+                np.array([[1, 1, 1, 1, 1],
+                          [1, 1, 0, 0, 0],
+                          [1, 1, 1, 1, 1],
+                          [1, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [1, 1, 0, 0],
+                          [1, 1, 1, 1],
+                          [1, 0, 1, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '7': [
+                np.array([[1, 1, 1, 1, 1],
+                          [0, 0, 0, 0, 1],
+                          [0, 0, 0, 0, 1],
+                          [0, 0, 0, 0, 1],
+                          [0, 0, 0, 0, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [0, 0, 0, 1],
+                          [0, 0, 0, 1],
+                          [0, 0, 0, 1],
+                          [0, 0, 0, 1]], dtype=np.float32)
+            ],
+            '8': [
+                np.array([[1, 1, 1, 1, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 1, 1, 1, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [1, 0, 0, 1],
+                          [1, 1, 1, 1],
+                          [1, 0, 0, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '9': [
+                np.array([[1, 1, 1, 1, 1],
+                          [1, 0, 0, 0, 1],
+                          [1, 1, 1, 1, 1],
+                          [0, 0, 0, 1, 1],
+                          [1, 1, 1, 1, 1]], dtype=np.float32),
+                np.array([[1, 1, 1, 1],
+                          [1, 0, 0, 1],
+                          [1, 1, 1, 1],
+                          [0, 0, 1, 1],
+                          [1, 1, 1, 1]], dtype=np.float32)
+            ],
+            '.': [
+                np.array([[0, 0, 0],
+                          [0, 1, 0],
+                          [0, 0, 0]], dtype=np.float32),
+                np.array([[0, 0],
+                          [1, 0],
+                          [0, 0]], dtype=np.float32)
+            ]
+        }
+        # Try multiple sizes for digit image
+        sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
+        best_match, best_score = None, -1
+        for size in sizes:
+            digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
+            digit_img_resized = (digit_img_resized > 100).astype(np.float32)  # Binarize
+            for digit, templates in digit_templates.items():
+                for template in templates:
+                    if digit == '.' and size[0] > 3:
+                        continue
+                    if digit != '.' and size[0] <= 3:
+                        continue
+                    if template.shape[0] != size[0] or template.shape[1] != size[1]:
+                        continue
+                    result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
+                    _, max_val, _, _ = cv2.minMaxLoc(result)
+                    if max_val > 0.55 and max_val > best_score:  # Further lowered threshold
+                        best_score = max_val
+                        best_match = digit
+        logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
+        return best_match if best_score > 0.55 else None
+    except Exception as e:
+        logging.error(f"Template digit detection failed: {str(e)}")
+        return None
+def perform_ocr(img, roi_bbox):
+    """Perform OCR with Tesseract and robust template fallback."""
+    try:
+        thresh, enhanced = preprocess_image(img)
+        brightness = estimate_brightness(img)
+        pil_img = Image.fromarray(enhanced)
+        save_debug_image(pil_img, "07_ocr_input")
+        # Try multiple Tesseract configurations
+        configs = [
+            r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.',  # Single line
+            r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'   # Block of text
+        ]
+        for config in configs:
+            text = pytesseract.image_to_string(pil_img, config=config)
+            logging.info(f"Tesseract raw output (config {config}): {text}")
+            text = re.sub(r"[^\d\.]", "", text)
+            if text.count('.') > 1:
+                text = text.replace('.', '', text.count('.') - 1)
+            text = text.strip('.')
+            if text and re.fullmatch(r"^\d*\.?\d*$", text):
+                text = text.lstrip('0') or '0'
+                confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
+                logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
+                return text, confidence
+        # Fallback to template-based detection
+        logging.info("Tesseract failed, using template-based detection.")
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        digits_info = []
+        for c in contours:
+            x, y, w, h = cv2.boundingRect(c)
+            if w > 4 and h > 5 and 0.03 <= w/h <= 4.0:
+                digits_info.append((x, x+w, y, y+h))
+        if digits_info:
+            digits_info.sort(key=lambda x: x[0])
+            recognized_text = ""
+            prev_x_max = -float('inf')
+            for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
+                x_min, y_min = max(0, x_min), max(0, y_min)
+                x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
+                if x_max <= x_min or y_max <= y_min:
+                    continue
+                digit_crop = thresh[y_min:y_max, x_min:x_max]
+                save_debug_image(digit_crop, f"08_digit_crop_{idx}")
+                digit = detect_digit_template(digit_crop, brightness)
+                if digit:
+                    recognized_text += digit
+                elif x_min - prev_x_max < 10 and prev_x_max != -float('inf'):
+                    recognized_text += '.'
+                prev_x_max = x_max
+            text = re.sub(r"[^\d\.]", "", recognized_text)
+            if text.count('.') > 1:
+                text = text.replace('.', '', text.count('.') - 1)
+            text = text.strip('.')
+            if text and re.fullmatch(r"^\d*\.?\d*$", text):
+                text = text.lstrip('0') or '0'
+                confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
+                logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
+                return text, confidence
+        logging.info("No valid digits detected.")
+        return None, 0.0
+    except Exception as e:
+        logging.error(f"OCR failed: {str(e)}")
+        return None, 0.0
 def extract_weight_from_image(pil_img):
+    """Extract weight from any digital scale image."""
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        save_debug_image(img, "00_input_image")
+        img = correct_rotation(img)
+        brightness = estimate_brightness(img)
+        conf_threshold = 0.65 if brightness > 70 else 0.45
+        # Try ROI-based detection
+        roi_img, roi_bbox = detect_roi(img)
+        if roi_bbox:
+            conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.05) else 1.0
+        result, confidence = perform_ocr(roi_img, roi_bbox)
+        if result and confidence >= conf_threshold * 100:
+            try:
+                weight = float(result)
+                if 0.001 <= weight <= 5000:
+                    logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
+                    return result, confidence
+                logging.warning(f"Weight {result} out of range.")
+            except ValueError:
+                logging.warning(f"Invalid weight format: {result}")
+        # Full image fallback
+        logging.info("Primary OCR failed, using full image fallback.")
+        result, confidence = perform_ocr(img, None)
+        if result and confidence >= conf_threshold * 0.85 * 100:
+            try:
+                weight = float(result)
+                if 0.001 <= weight <= 5000:
+                    logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
+                    return result, confidence
+                logging.warning(f"Full image weight {result} out of range.")
+            except ValueError:
+                logging.warning(f"Invalid full image weight format: {result}")
+        logging.info("No valid weight detected.")
         return "Not detected", 0.0
+    except Exception as e:
+        logging.error(f"Weight extraction failed: {str(e)}")
+        return "Not detected", 0.0