Spaces:

motheecreator
/

detect-and-describe

Sleeping

App Files Files Community

motheecreator commited on Oct 5, 2024

Commit

8064ded

verified ·

1 Parent(s): 3fa08b4

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -7

app.py CHANGED Viewed

@@ -3,6 +3,12 @@ from transformers import AutoTokenizer, VisionEncoderDecoderModel, AutoImageProc
 from PIL import Image
 from torchvision.transforms.functional import crop
 import gradio as gr
 # Global variables for models
 object_detection_model = None
@@ -17,8 +23,29 @@ def init():
     # Step 1: Load the YOLOv5 model from Hugging Face
     try:
         print("Loading YOLOv5 model...")
-        object_detection_model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/weights/best14.pt', trust_repo=True)
-        print("YOLOv5 model loaded successfully.")
     except Exception as e:
         print(f"Error loading YOLOv5 model: {e}")
         object_detection_model = None
@@ -38,7 +65,8 @@ def init():
 def crop_objects(image, boxes):
     cropped_images = []
     for box in boxes:
-        cropped_image = crop(image, int(box[1]), int(box[0]), int(box[3] - box[1]), int(box[2] - box[0]))
         cropped_images.append(cropped_image)
     return cropped_images
@@ -83,8 +111,11 @@ def process_image(image):
                 "confidence_score": float(score)  # Convert to float
             })
         # Return the image with detections and the caption
-        return results.render()[0], detection_results, original_caption
     except Exception as e:
         return None, {"error": str(e)}, None
@@ -96,9 +127,11 @@ init()
 interface = gr.Interface(
     fn=process_image,  # Function to run
     inputs=gr.Image(type="pil"),  # Input: Image upload
-    outputs=[gr.Image(type="pil", label="Detected Objects"),  # Output 1: Image with bounding boxes
-             gr.JSON(label="Object Captions & Bounding Boxes"),  # Output 2: JSON results for each object
-             gr.Textbox(label="Whole Image Caption")],  # Output 3: Caption for the whole image
     live=True
 )

 from PIL import Image
 from torchvision.transforms.functional import crop
 import gradio as gr
+import json
+import base64
+import io
+from huggingface_hub import hf_hub_download
+import zipfile
+import os
 # Global variables for models
 object_detection_model = None
     # Step 1: Load the YOLOv5 model from Hugging Face
     try:
         print("Loading YOLOv5 model...")
+        # Get Hugging Face auth token from environment variable
+        auth_token = os.getenv("HF_AUTH_TOKEN")
+        if not auth_token:
+            print("Error: HF_AUTH_TOKEN environment variable not set.")
+            object_detection_model = None
+        else:
+            # Download the zip file from Hugging Face
+            zip_path = hf_hub_download(repo_id='Mexbow/Yolov5_object_detection', filename='yolov5.zip', use_auth_token=auth_token)
+            # Extract the YOLOv5 model
+            extract_path = './yolov5_model'  # Specify extraction path
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                os.makedirs(extract_path, exist_ok=True)
+                zip_ref.extractall(extract_path)
+            # Load the YOLOv5 model
+            model_path = os.path.join(extract_path, 'yolov5/weights/best14.pt')
+            if not os.path.exists(model_path):
+                print(f"Error: YOLOv5 model file not found at {model_path}")
+                object_detection_model = None
+            else:
+                object_detection_model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path, trust_repo=True)
+                print("YOLOv5 model loaded successfully.")
     except Exception as e:
         print(f"Error loading YOLOv5 model: {e}")
         object_detection_model = None
 def crop_objects(image, boxes):
     cropped_images = []
     for box in boxes:
+        left, top, right, bottom = box
+        cropped_image = image.crop((left, top, right, bottom))
         cropped_images.append(cropped_image)
     return cropped_images
                 "confidence_score": float(score)  # Convert to float
             })
+        # Render image with bounding boxes
+        result_image = results.render()[0]
         # Return the image with detections and the caption
+        return result_image, detection_results, original_caption
     except Exception as e:
         return None, {"error": str(e)}, None
 interface = gr.Interface(
     fn=process_image,  # Function to run
     inputs=gr.Image(type="pil"),  # Input: Image upload
+    outputs=[
+        gr.Image(type="pil", label="Detected Objects"),  # Output 1: Image with bounding boxes
+        gr.JSON(label="Object Captions & Bounding Boxes"),  # Output 2: JSON results for each object
+        gr.Textbox(label="Whole Image Caption")  # Output 3: Caption for the whole image
+    ],
     live=True
 )