Spaces:

mindee
/

doctr

Running

App Files Files Community

odulcy-mindee commited on Oct 8, 2024

Commit

93d0893

verified ·

1 Parent(s): 92af12f

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +10 -2
app.py +33 -4
backend/pytorch.py +12 -0
backend/tensorflow.py +101 -0
packages.txt +1 -1

README.md CHANGED Viewed

@@ -4,13 +4,13 @@ emoji: 📑
 colorFrom: purple
 colorTo: pink
 sdk: streamlit
-sdk_version: 1.30.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
-# Configuration
 `title`: _string_
 Display title for the Space
@@ -37,3 +37,11 @@ Path is relative to the root of the repository.
 `pinned`: _boolean_
 Whether the Space stays on top of your list.

 colorFrom: purple
 colorTo: pink
 sdk: streamlit
+sdk_version: 1.39.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
+## Configuration
 `title`: _string_
 Display title for the Space
 `pinned`: _boolean_
 Whether the Space stays on top of your list.
+## Run the demo locally
+```bash
+cd demo
+pip install -r pt-requirements.txt
+streamlit run app.py
+```

app.py CHANGED Viewed

@@ -7,14 +7,25 @@ import cv2
 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
-import torch
 from doctr.io import DocumentFile
 from doctr.utils.visualization import visualize_page
-from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
-forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 def main(det_archs, reco_archs):
@@ -51,6 +62,7 @@ def main(det_archs, reco_archs):
     # Model selection
     st.sidebar.title("Model selection")
     det_arch = st.sidebar.selectbox("Text detection model", det_archs)
     reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs)
@@ -60,12 +72,21 @@ def main(det_archs, reco_archs):
     st.sidebar.title("Parameters")
     assume_straight_pages = st.sidebar.checkbox("Assume straight pages", value=True)
     st.sidebar.write("\n")
     # Straighten pages
     straighten_pages = st.sidebar.checkbox("Straighten pages", value=False)
     st.sidebar.write("\n")
     # Binarization threshold
     bin_thresh = st.sidebar.slider("Binarization threshold", min_value=0.1, max_value=0.9, value=0.3, step=0.1)
     st.sidebar.write("\n")
     if st.sidebar.button("Analyze page"):
         if uploaded_file is None:
@@ -74,7 +95,15 @@ def main(det_archs, reco_archs):
         else:
             with st.spinner("Loading model..."):
                 predictor = load_predictor(
-                    det_arch, reco_arch, assume_straight_pages, straighten_pages, bin_thresh, forward_device
                 )
             with st.spinner("Analyzing..."):

 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
+from doctr.file_utils import is_tf_available
 from doctr.io import DocumentFile
 from doctr.utils.visualization import visualize_page
+if is_tf_available():
+    import tensorflow as tf
+    from backend.tensorflow import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
+    if any(tf.config.experimental.list_physical_devices("gpu")):
+        forward_device = tf.device("/gpu:0")
+    else:
+        forward_device = tf.device("/cpu:0")
+else:
+    import torch
+    from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
+    forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 def main(det_archs, reco_archs):
     # Model selection
     st.sidebar.title("Model selection")
+    st.sidebar.markdown("**Backend**: " + ("TensorFlow" if is_tf_available() else "PyTorch"))
     det_arch = st.sidebar.selectbox("Text detection model", det_archs)
     reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs)
     st.sidebar.title("Parameters")
     assume_straight_pages = st.sidebar.checkbox("Assume straight pages", value=True)
     st.sidebar.write("\n")
+    # Disable page orientation detection
+    disable_page_orientation = st.sidebar.checkbox("Disable page orientation detection", value=False)
+    st.sidebar.write("\n")
+    # Disable crop orientation detection
+    disable_crop_orientation = st.sidebar.checkbox("Disable crop orientation detection", value=False)
+    st.sidebar.write("\n")
     # Straighten pages
     straighten_pages = st.sidebar.checkbox("Straighten pages", value=False)
     st.sidebar.write("\n")
     # Binarization threshold
     bin_thresh = st.sidebar.slider("Binarization threshold", min_value=0.1, max_value=0.9, value=0.3, step=0.1)
     st.sidebar.write("\n")
+    # Box threshold
+    box_thresh = st.sidebar.slider("Box threshold", min_value=0.1, max_value=0.9, value=0.1, step=0.1)
+    st.sidebar.write("\n")
     if st.sidebar.button("Analyze page"):
         if uploaded_file is None:
         else:
             with st.spinner("Loading model..."):
                 predictor = load_predictor(
+                    det_arch,
+                    reco_arch,
+                    assume_straight_pages,
+                    straighten_pages,
+                    disable_page_orientation,
+                    disable_crop_orientation,
+                    bin_thresh,
+                    box_thresh,
+                    forward_device,
                 )
             with st.spinner("Analyzing..."):

backend/pytorch.py CHANGED Viewed

@@ -10,6 +10,9 @@ from doctr.models import ocr_predictor
 from doctr.models.predictor import OCRPredictor
 DET_ARCHS = [
     "db_resnet50",
     "db_resnet34",
     "db_mobilenet_v3_large",
@@ -34,7 +37,10 @@ def load_predictor(
     reco_arch: str,
     assume_straight_pages: bool,
     straighten_pages: bool,
     bin_thresh: float,
     device: torch.device,
 ) -> OCRPredictor:
     """Load a predictor from doctr.models
@@ -45,7 +51,10 @@ def load_predictor(
         reco_arch: recognition architecture
         assume_straight_pages: whether to assume straight pages or not
         straighten_pages: whether to straighten rotated pages or not
         bin_thresh: binarization threshold for the segmentation map
         device: torch.device, the device to load the predictor on
     Returns:
@@ -60,8 +69,11 @@ def load_predictor(
         straighten_pages=straighten_pages,
         export_as_straight_boxes=straighten_pages,
         detect_orientation=not assume_straight_pages,
     ).to(device)
     predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
     return predictor

 from doctr.models.predictor import OCRPredictor
 DET_ARCHS = [
+    "fast_base",
+    "fast_small",
+    "fast_tiny",
     "db_resnet50",
     "db_resnet34",
     "db_mobilenet_v3_large",
     reco_arch: str,
     assume_straight_pages: bool,
     straighten_pages: bool,
+    disable_page_orientation: bool,
+    disable_crop_orientation: bool,
     bin_thresh: float,
+    box_thresh: float,
     device: torch.device,
 ) -> OCRPredictor:
     """Load a predictor from doctr.models
         reco_arch: recognition architecture
         assume_straight_pages: whether to assume straight pages or not
         straighten_pages: whether to straighten rotated pages or not
+        disable_page_orientation: whether to disable page orientation or not
+        disable_crop_orientation: whether to disable crop orientation or not
         bin_thresh: binarization threshold for the segmentation map
+        box_thresh: minimal objectness score to consider a box
         device: torch.device, the device to load the predictor on
     Returns:
         straighten_pages=straighten_pages,
         export_as_straight_boxes=straighten_pages,
         detect_orientation=not assume_straight_pages,
+        disable_page_orientation=disable_page_orientation,
+        disable_crop_orientation=disable_crop_orientation,
     ).to(device)
     predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+    predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
     return predictor

backend/tensorflow.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import numpy as np
+import tensorflow as tf
+from doctr.models import ocr_predictor
+from doctr.models.predictor import OCRPredictor
+DET_ARCHS = [
+    "fast_base",
+    "fast_small",
+    "fast_tiny",
+    "db_resnet50",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+]
+RECO_ARCHS = [
+    "crnn_vgg16_bn",
+    "crnn_mobilenet_v3_small",
+    "crnn_mobilenet_v3_large",
+    "master",
+    "sar_resnet31",
+    "vitstr_small",
+    "vitstr_base",
+    "parseq",
+]
+def load_predictor(
+    det_arch: str,
+    reco_arch: str,
+    assume_straight_pages: bool,
+    straighten_pages: bool,
+    disable_page_orientation: bool,
+    disable_crop_orientation: bool,
+    bin_thresh: float,
+    box_thresh: float,
+    device: tf.device,
+) -> OCRPredictor:
+    """Load a predictor from doctr.models
+    Args:
+    ----
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        assume_straight_pages: whether to assume straight pages or not
+        straighten_pages: whether to straighten rotated pages or not
+        disable_page_orientation: whether to disable page orientation or not
+        disable_crop_orientation: whether to disable crop orientation or not
+        bin_thresh: binarization threshold for the segmentation map
+        box_thresh: threshold for the detection boxes
+        device: tf.device, the device to load the predictor on
+    Returns:
+    -------
+        instance of OCRPredictor
+    """
+    with device:
+        predictor = ocr_predictor(
+            det_arch,
+            reco_arch,
+            pretrained=True,
+            assume_straight_pages=assume_straight_pages,
+            straighten_pages=straighten_pages,
+            export_as_straight_boxes=straighten_pages,
+            detect_orientation=not assume_straight_pages,
+            disable_page_orientation=disable_page_orientation,
+            disable_crop_orientation=disable_crop_orientation,
+        )
+        predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+        predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
+    return predictor
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: tf.device) -> np.ndarray:
+    """Forward an image through the predictor
+    Args:
+    ----
+        predictor: instance of OCRPredictor
+        image: image to process as numpy array
+        device: tf.device, the device to process the image on
+    Returns:
+    -------
+        segmentation map
+    """
+    with device:
+        processed_batches = predictor.det_predictor.pre_processor([image])
+        out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
+        seg_map = out["out_map"]
+    with tf.device("/cpu:0"):
+        seg_map = tf.identity(seg_map).numpy()
+    return seg_map

packages.txt CHANGED Viewed

	@@ -1 +1 @@
1	- python3-opencv


1	+ python3-opencv