Spaces:

wjbmattingly
/

docyolo-demo

Sleeping

App Files Files Community

wjbmattingly commited on Mar 26

Commit

d39c80d

verified ·

1 Parent(s): b7fdefb

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+os.environ["GRADIO_TEMP_DIR"] = "./tmp"
+import sys
+import spaces
+import torch
+import torchvision
+import gradio as gr
+import numpy as np
+from PIL import Image
+from huggingface_hub import snapshot_download
+from visualization import visualize_bbox
+# == download weights ==
+model_dir = snapshot_download('juliozhao/DocLayout-YOLO-DocStructBench', local_dir='./models/DocLayout-YOLO-DocStructBench')
+# == select device ==
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+id_to_names = {
+    0: 'title',
+    1: 'plain text',
+    2: 'abandon',
+    3: 'figure',
+    4: 'figure_caption',
+    5: 'table',
+    6: 'table_caption',
+    7: 'table_footnote',
+    8: 'isolate_formula',
+    9: 'formula_caption'
+}
+@spaces.GPU
+def recognize_image(input_img, conf_threshold, iou_threshold):
+    det_res = model.predict(
+        input_img,
+        imgsz=1024,
+        conf=conf_threshold,
+        device=device,
+    )[0]
+    boxes = det_res.__dict__['boxes'].xyxy
+    classes = det_res.__dict__['boxes'].cls
+    scores = det_res.__dict__['boxes'].conf
+    indices = torchvision.ops.nms(boxes=torch.Tensor(boxes), scores=torch.Tensor(scores),iou_threshold=iou_threshold)
+    boxes, scores, classes = boxes[indices], scores[indices], classes[indices]
+    if len(boxes.shape) == 1:
+        boxes = np.expand_dims(boxes, 0)
+        scores = np.expand_dims(scores, 0)
+        classes = np.expand_dims(classes, 0)
+    vis_result = visualize_bbox(input_img, boxes, classes, scores, id_to_names)
+    return vis_result
+def gradio_reset():
+    return gr.update(value=None), gr.update(value=None)
+if __name__ == "__main__":
+    root_path = os.path.abspath(os.getcwd())
+    # == load model ==
+    from doclayout_yolo import YOLOv10
+    print(f"Using device: {device}")
+    model = YOLOv10(os.path.join(os.path.dirname(__file__), "models", "DocLayout-YOLO-DocStructBench", "doclayout_yolo_docstructbench_imgsz1024.pt"))  # load an official model
+    with open("header.html", "r") as file:
+        header = file.read()
+    with gr.Blocks() as demo:
+        gr.HTML(header)
+        with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(label=" ", interactive=True)
+                with gr.Row():
+                    clear = gr.Button(value="Clear")
+                    predict = gr.Button(value="Detect", interactive=True, variant="primary")
+                with gr.Row():
+                    conf_threshold = gr.Slider(
+                        label="Confidence Threshold",
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.05,
+                        value=0.25,
+                    )
+                with gr.Row():
+                    iou_threshold = gr.Slider(
+                        label="NMS IOU Threshold",
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.05,
+                        value=0.45,
+                    )
+                with gr.Accordion("Examples:"):
+                    example_root = os.path.join(os.path.dirname(__file__), "assets", "example")
+                    gr.Examples(
+                        examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
+                                    _.endswith("jpg")],
+                        inputs=[input_img],
+                    )
+            with gr.Column():
+                gr.Button(value="Predict Result:", interactive=False)
+                output_img = gr.Image(label=" ", interactive=False)
+        clear.click(gradio_reset, inputs=None, outputs=[input_img, output_img])
+        predict.click(recognize_image, inputs=[input_img,conf_threshold,iou_threshold], outputs=[output_img])
+    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)