Spaces:

Abdur123
/

satellite-image-roofs-detection

Running

App Files Files Community

Abdur123 commited on about 23 hours ago

Commit

65b8939

1 Parent(s): 7673bbf

Add application file

Browse files

Files changed (5) hide show

.gitignore +201 -0
app.py +145 -0
img/example.jpg +0 -0
inference.py +315 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,201 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# poetry
+poetry.lock
+# pdm
+.pdm.toml
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+.idea/
+# VS Code
+.vscode/
+# Temporary files
+*.tmp
+*.temp
+*.swp
+*.swo
+*~
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# User-specific directories and files
+jlwkkuvw7a/
+*.coco.json
+# Model files (these will be downloaded automatically)
+*.pt
+*.pth
+*.onnx
+*.safetensors
+# Lock files
+uv.lock
+poetry.lock
+Pipfile.lock
+# Project configuration files
+pyproject.toml
+setup.py
+setup.cfg
+# Logs
+*.log
+logs/
+# Cache directories
+.cache/
+cache/
+# Temporary directories
+tmp/
+temp/
+# Large files that shouldn't be in git
+*.zip
+*.tar.gz
+*.rar
+*.7z
+# Keep only essential files for deployment
+# The following files are essential and should NOT be ignored:
+# - app.py
+# - inference.py
+# - requirements.txt
+# - README.md
+# - img/example.jpg
+# - .gitattributes

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+from pathlib import Path
+import secrets
+import shutil
+from inference import detector, detector_processor, segment_predictor, ModelInference
+current_dir = Path(__file__).parent
+def process_images(image_path, files, slider_value, request: gr.Request):
+    user_dir: Path = current_dir / str(request.session_hash)
+    user_dir.mkdir(exist_ok=True)
+    annotation_path = user_dir / f"{secrets.token_hex(nbytes=8)}_annotations.coco.json"
+    class_names = list(inferencer.id2label.values())
+    if image_path:
+        print(f"Processing image: {image_path}")
+        seg_detections, annotated_frame = inferencer.predict_one(image_path)
+        inferencer.save_annotations([image_path], [seg_detections], class_names, annotation_path)
+    elif files:
+        print(f"Processing files: {files}")
+        print(f"Batch size: {slider_value}")
+        all_image_paths, all_results, annotated_frame,  detector_failed_list, segmentor_failed_list = inferencer.predict_folder(files, slider_value)
+        print(f"Detector failed list: {detector_failed_list}")
+        print(f"Segmentor failed list: {segmentor_failed_list}")
+        inferencer.save_annotations(all_image_paths, all_results, class_names, annotation_path)
+    return [
+        gr.UploadButton(visible=False),
+        gr.Button("Run", visible=False),
+        gr.DownloadButton("Download annotation results", value=annotation_path, label="Download", visible=True),
+        gr.Image(value=annotated_frame, label="Annotated Image", visible=True),
+    ]
+def upload_file():
+    return [
+        None,
+        gr.UploadButton(visible=False),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=True),
+        gr.Button("Run", visible=True),
+        gr.DownloadButton(visible=False),
+        gr.Image(value=None, label="Annotated Image", visible=True),
+    ]
+def upload_image(imge_path):
+    return [
+        gr.UploadButton(visible=False),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False),
+        gr.Button("Run", visible=True),
+        gr.DownloadButton(visible=False),
+        gr.Image(value=None, label="Annotated Image", visible=True),
+    ]
+def download_file():
+    return [
+        gr.Image(value=None),
+        gr.UploadButton(visible=True),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False),
+        gr.Button("Run", visible=False),
+        gr.DownloadButton(visible=True),
+        gr.Image(value=None, visible=False),
+    ]
+def delete_directory(request: gr.Request):
+    """Delete the user-specific directory when the user's session ends."""
+    user_dir = current_dir / str(request.session_hash)
+    if user_dir.exists():
+        shutil.rmtree(user_dir)
+def create_gradio_interface():
+    with gr.Blocks(theme=gr.themes.Monochrome(), delete_cache=(60, 3600)) as demo:
+        gr.HTML("""
+                <div style="text-align: center;">
+                <h1>Satellite Image Roofs Auto Annotation</h1>
+                <p>Powered by a <a href="https://huggingface.co/Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection" target="_blank">fine-tuned RT-DETR model</a> and a Fast-SAM model.</p>
+                <p>📤 Upload an image or a folder containing images.</p>
+                <p>🖼️ Images are saved in a user-specific directory and are deleted when the user closes the page.</p>
+                <p>⚙️ Each user can upload files with a maximum size of 200 MB each time.</p>
+                <p>🏷️ Annotation results will be saved in the COCO format for download.</p>
+                <p>🔧 TODO: Enhance model inference using Intel OpenVINO.</p>
+                </div>
+            """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                img_input = gr.Image(
+                    interactive=True,
+                    sources=["upload", "clipboard"],
+                    show_share_button=True,
+                    type='filepath',
+                    label="Upload a single image",
+                )
+                upload_button = gr.UploadButton("Upload a folder", file_count="directory")
+                batch_slider = gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False)
+                run_button = gr.Button("Run", visible=False)
+            with gr.Column(scale=1):
+                img_output = gr.Image(label="Annotated Image", visible=False)
+                download_button = gr.DownloadButton("Download annotation results", label="Download", visible=False)
+        with gr.Row():
+            examples = gr.Examples(
+                examples=[["./img/example.jpg"]],
+                inputs=[img_input],
+                outputs=[upload_button, batch_slider, run_button, download_button, img_output],
+                fn=upload_image,
+                run_on_click=True,
+            )
+        upload_button.upload(upload_file, None, [img_input, upload_button, batch_slider, run_button, download_button, img_output])
+        download_button.click(download_file, None, [img_input, upload_button, batch_slider, run_button, download_button, img_output])
+        run_button.click(process_images,
+                         [img_input, upload_button, batch_slider],
+                         [upload_button, run_button, download_button, img_output])
+        img_input.upload(upload_image, img_input, [upload_button, batch_slider, run_button, download_button, img_output])
+        demo.unload(delete_directory)
+    return demo
+def inferencer_init():
+    id2label = {0: 'building'}
+    CONFIDENCE_TRESHOLD = 0.5
+    return ModelInference(detector, detector_processor, segment_predictor, id2label, CONFIDENCE_TRESHOLD)
+inferencer = inferencer_init()
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(max_file_size=200 * gr.FileSize.MB)

img/example.jpg ADDED Viewed

inference.py ADDED Viewed

	@@ -0,0 +1,315 @@

+from transformers import AutoModelForObjectDetection, AutoImageProcessor
+from torch.utils.data import Dataset, DataLoader
+import os
+from tqdm import tqdm
+from PIL import Image
+from pathlib import Path
+from ultralytics.models.fastsam import FastSAMPredictor
+import supervision as sv
+import torch
+import numpy as np
+import cv2
+from typing import List, Tuple, Dict, Any, Optional
+from supervision.dataset.utils import approximate_mask_with_polygons
+from supervision.detection.utils import (
+    contains_holes,
+    contains_multiple_segments,
+)
+detector = AutoModelForObjectDetection.from_pretrained("Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection")
+detector_processor = AutoImageProcessor.from_pretrained("Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection")
+overrides = dict(conf=0.25, task="segment", mode="predict", model="FastSAM-x.pt", save=False)
+segment_predictor = FastSAMPredictor(overrides=overrides)
+# IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"}  # image suffixes
+class ImageInferenceDataset(Dataset):
+    def __init__(self, image_paths: Path, image_processor):
+        """
+        A custom dataset class for image inference without annotations or masks.
+        Args:
+            image_folder (Path): The path to the folder containing images.
+            image_processor: A callable for processing images (usually a transformer or feature extractor).
+            image_formats (set): A set of supported image formats to be filtered.
+        """
+        self.image_processor = image_processor
+        # Filter out files that are not supported image formats
+        self.image_files = image_paths
+    def __len__(self) -> int:
+        return len(self.image_files)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, str]:
+        """
+        Get an image from the dataset at the specified index.
+        Args:
+            idx (int): The index of the image.
+        Returns:
+            Tuple[torch.Tensor, str]: A tuple containing the processed image tensor and the image file path.
+        """
+        image_path = self.image_files[idx]
+        # Open image using PIL and process it using the provided image processor
+        with Image.open(image_path) as img:
+            orig_size = img.size
+            img = img.convert("RGB")  # Ensure all images are in RGB format for consistency
+            processed_img = self.image_processor(images=img, return_tensors="pt")["pixel_values"].squeeze(0)
+        return processed_img, str(image_path), orig_size
+def collate_fn_inference(batch: List[Tuple[torch.Tensor, str]]) -> dict:
+    """
+    Collate function for batching images for inference.
+    Args:
+        batch (List[Tuple[torch.Tensor, str]]): A list of tuples where each tuple contains
+                                                the processed image tensor and image path.
+    Returns:
+        dict: A dictionary containing the batched image tensors and corresponding image file paths.
+    """
+    pixel_values = [item[0] for item in batch]  # Extract processed images
+    image_paths = [item[1] for item in batch]   # Extract image paths
+    orig_sizes = [item[2] for item in batch]
+    # Pad the images to match the largest image in the batch
+    encoding = detector_processor.pad(pixel_values, return_tensors="pt")
+    return {
+        'pixel_values': encoding['pixel_values'],
+        'pixel_mask': encoding['pixel_mask'],  # Padding mask (if needed by the model)
+        'image_paths': image_paths,
+        'orig_sizes': orig_sizes
+    }
+class ModelInference:
+    def __init__(self, detector, detector_processor, segment_predictor, id2label, CONFIDENCE_TRESHOLD):
+        self.detector = detector
+        self.detector_processor = detector_processor
+        self.segment_predictor = segment_predictor
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.CONFIDENCE_TRESHOLD = CONFIDENCE_TRESHOLD
+        self.id2label = id2label
+        self.mask_annotator = sv.MaskAnnotator()
+        self.detector.to(self.device)
+    def predict_one(self, image_path):
+        image = cv2.imread(image_path)
+        with torch.no_grad():
+            # load image and predict
+            inputs = self.detector_processor(images=image, return_tensors='pt').to(self.device)
+            outputs = self.detector(**inputs)
+            # post-process
+            target_sizes = torch.tensor([image.shape[:2]]).to(self.device)
+            results = detector_processor.post_process_object_detection(
+                outputs=outputs,
+                threshold=self.CONFIDENCE_TRESHOLD,
+                target_sizes=target_sizes
+            )[0]
+            if results['boxes'].numel() == 0:
+                print("No bounding box detected")
+                return None, None
+            else:
+                det_detections = sv.Detections.from_transformers(transformers_results=results).with_nms(threshold=0.5)
+            everything_results = self.segment_predictor(image)
+        if everything_results[0].masks is not None:
+            bbox_results = self.segment_predictor.prompt(everything_results, det_detections.xyxy.tolist())[0]
+            seg_detections = sv.Detections.from_ultralytics(bbox_results)
+            seg_detections = self.filter_small_masks(seg_detections)
+            max_length = max(len(name) for name in self.id2label.values())
+            # Create a new NumPy array with the appropriate dtype based on the longest string
+            seg_detections.data['class_name'] = np.array(seg_detections.data['class_name'], dtype=f'<U{max_length}')
+            for idx, class_name in enumerate(seg_detections.data['class_name']):
+                if class_name == 'object':
+                    seg_detections.data['class_name'][idx] = self.id2label[seg_detections.class_id[idx]]
+            annotated_frame = image.copy()
+            annotated_frame = self.mask_annotator.annotate(scene=annotated_frame, detections=seg_detections)
+            return seg_detections, annotated_frame
+        else:
+            print("No segmentation mask generated")
+            return None, None
+    def predict_folder(self, image_paths, batch_size=4):
+        dataset = ImageInferenceDataset(image_paths=image_paths, image_processor=detector_processor)
+        # Create DataLoader instance with the custom collate function
+        dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn_inference)
+        detector_failed_list = []
+        segmentor_failed_list = []
+        id2label = {0: 'building'}
+        max_length = max(len(name) for name in id2label.values())
+        all_image_paths = []
+        all_results = []
+        for idx, batch in enumerate(tqdm(dataloader)):
+            pixel_values = batch["pixel_values"].to(self.device)
+            pixel_mask = batch["pixel_mask"].to(self.device)
+            image_paths = batch["image_paths"]
+            orig_sizes = batch["orig_sizes"]
+            orig_target_sizes = torch.tensor(orig_sizes, device=self.device)
+            with torch.no_grad():
+                outputs = self.detector(
+                    pixel_values=pixel_values, pixel_mask=pixel_mask)
+            # orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
+            detector_results = detector_processor.post_process_object_detection(
+                outputs,
+                target_sizes=orig_target_sizes)
+            detector_detections = []
+            detector_to_remove = []
+            for idx, detector_result in enumerate(detector_results):
+                if detector_result['boxes'].numel() == 0:
+                    # The tensor is empty
+                    detector_to_remove.append(idx)
+                else:
+                    detector_detections.append(sv.Detections.from_transformers(transformers_results=detector_result))
+            if detector_to_remove is not None:
+                # Remove items from detector_results and image_ids by reversing the indices to avoid index shifting
+                for idx in sorted(detector_to_remove, reverse=True):
+                    detector_failed_list.append(image_paths[idx])
+                    del image_paths[idx]
+            images_raw = [cv2.imread(image_path) for image_path in image_paths]
+            boxes = [detections.xyxy.tolist() for detections in detector_detections]
+            results = []
+            to_remove_seg = []
+            for idx, (image_path, image, box) in enumerate(zip(image_paths, images_raw, boxes)):
+                try:
+                    with torch.no_grad():
+                        # segmentation_result = segment_model(image, bboxes=box)[0]
+                        everything_results = self.segment_predictor(image)
+                        if everything_results[0].masks is not None:
+                            bbox_results = self.segment_predictor.prompt(everything_results, box)[0]
+                            seg_detections = sv.Detections.from_ultralytics(bbox_results)
+                            seg_detections = self.filter_small_masks(seg_detections)
+                            seg_detections.data['class_name'] = np.array(seg_detections.data['class_name'], dtype=f'<U{max_length}')
+                            for idx, class_name in enumerate(seg_detections.data['class_name']):
+                                if class_name == 'object':
+                                    seg_detections.data['class_name'][idx] = id2label[seg_detections.class_id[idx]]
+                            results.append(seg_detections)
+                        else:
+                            to_remove_seg.append(idx)
+                except Exception as e:
+                    print(f"An error occurred: {e}")
+                    print(f"box: {box}")
+                    print(f"image id: {image_path}")
+                # result = sv.Detections.from_ultralytics(segmentation_result)
+                # results.append(result)
+            if to_remove_seg is not None:
+                for idx in sorted(to_remove_seg, reverse=True):
+                    segmentor_failed_list.append(image_paths[idx])
+                    del image_paths[idx]
+            if len(results) != len(image_paths):
+                print(f"Length of results ({len(results)}) does not match the length of image_ids ({len(image_paths)})")
+                continue
+            all_image_paths.extend(image_paths)
+            all_results.extend(results)
+            annotated_frame = cv2.imread(all_image_paths[0]).copy()
+            annotated_frame = self.mask_annotator.annotate(scene=annotated_frame, detections=all_results[0])
+        return all_image_paths, all_results, annotated_frame, detector_failed_list, segmentor_failed_list
+    def filter_small_masks(self, detections: sv.Detections) -> sv.Detections:
+        valid_indices = []
+        min_image_area_percentage = 0.002
+        max_image_area_percentage = 0.80
+        approximation_percentage = 0.75
+        for i, mask in enumerate(detections.mask):
+            # Check for structural issues in the mask
+            if not (contains_holes(mask) or contains_multiple_segments(mask)):
+                # Check if the mask can be approximated to a polygon successfully
+                if not approximate_mask_with_polygons(mask=mask,
+                                                      min_image_area_percentage=min_image_area_percentage,
+                                                      max_image_area_percentage=max_image_area_percentage,
+                                                      approximation_percentage=approximation_percentage,
+                                                      ):
+                    print(f"Skipping mask {i} due to structural issues")
+                    continue
+            # If all checks pass, add index to valid_indices
+            valid_indices.append(i)
+        filtered_xyxy = detections.xyxy[valid_indices]
+        filtered_mask = detections.mask[valid_indices]
+        filtered_confidence = detections.confidence[valid_indices]
+        filtered_class_id = detections.class_id[valid_indices]
+        filtered_class_name = detections.data['class_name'][valid_indices]
+        detections.xyxy = filtered_xyxy
+        detections.mask = filtered_mask
+        detections.confidence = filtered_confidence
+        detections.class_id = filtered_class_id
+        detections.data['class_name'] = filtered_class_name
+        return detections
+    def get_dict(
+        self,
+        image_paths: List[Any],
+        detections: List[Any]
+    ) -> Dict[str, Any]:
+        detections_dict = {}
+        for idx, image_path in enumerate(image_paths):
+            detections_dict[image_path] = detections[idx]
+        return detections_dict
+    def save_annotations(self,
+                         image_paths,
+                         detections,
+                         class_names,
+                         annotation_path,
+                         MIN_IMAGE_AREA_PERCENTAGE=0.002,
+                         MAX_IMAGE_AREA_PERCENTAGE=0.80,
+                         APPROXIMATION_PERCENTAGE=0.75):
+        # image_dir = annotation_path.parent
+        detections_dict = self.get_dict(image_paths, detections)
+        sv.DetectionDataset(
+            classes=class_names,
+            images=image_paths,
+            annotations=detections_dict
+        ).as_coco(
+            images_directory_path=None,
+            annotations_path=annotation_path,
+            min_image_area_percentage=MIN_IMAGE_AREA_PERCENTAGE,
+            max_image_area_percentage=MAX_IMAGE_AREA_PERCENTAGE,
+            approximation_percentage=APPROXIMATION_PERCENTAGE
+        )
+        return

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==5.1.0
+opencv-python==4.10.0.84
+torch==2.4.0
+supervision==0.23.0
+tqdm==4.66.5
+transformers==4.44.2
+ultralytics==8.2.85