Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import pytesseract | |
| from PIL import Image | |
| def extract_weight_from_image(pil_img): | |
| # ✅ Fix for missing libGL (required by OpenCV) | |
| os.system("apt-get update && apt-get install -y libgl1-mesa-glx") | |
| import cv2 # Must be imported after installing libGL | |
| # ✅ OCR pipeline | |
| image = np.array(pil_img.convert("RGB")) | |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) | |
| blur = cv2.GaussianBlur(gray, (3, 3), 0) | |
| _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
| config = '--psm 7 -c tessedit_char_whitelist=0123456789.' | |
| data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT) | |
| extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text']))) | |
| confidences = [int(conf) for conf in data['conf'] if conf.isdigit()] | |
| avg_conf = sum(confidences)/len(confidences) if confidences else 0 | |
| return extracted_text.strip(), avg_conf | |