import torch, torchvision from torchvision import transforms import numpy as np import gradio as gr from PIL import Image from torch.utils.data import DataLoader import itertools import matplotlib.pyplot as plt import matplotlib.patches as patches import cv2 import config as config from model import YOLOv3 from loss import YoloLoss from utils import get_loaders import utils new_state_dict = {} state_dict = torch.load('results/Yolov3_Lavanya.pth', map_location=torch.device('cpu')) for key, value in state_dict.items(): new_key = key.replace('model.', '') new_state_dict[new_key] = value model = YOLOv3(in_channels=3, num_classes=config.NUM_CLASSES) model.load_state_dict(new_state_dict, strict=True) model.eval() classes = ("aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor") import grad_cam_func as gcf from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients from pytorch_grad_cam.utils.image import show_cam_on_image def inference(input_img=None, iou_threshold=0.6, conf_threshold=0.5, gc_trans=0.3): if input_img is not None: tranform_img = config.infer_transforms(image=input_img) transform_img = tranform_img['image'].unsqueeze(0) transform_img_visual = config.infer_transforms_visualization(image=input_img)['image'] with torch.no_grad(): outputs = model(transform_img) bboxes = [[] for _ in range(transform_img.shape[0])] # range of Batch size for i in range(3): batch_size, A, S, _, _ = outputs[i].shape anchor = np.array(config.SCALED_ANCHORS[i]) boxes_scale_i = utils.cells_to_bboxes( outputs[i], anchor, S=S, is_preds=True) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=iou_threshold, threshold=conf_threshold, box_format="midpoint",) image, boxes = transform_img_visual.permute(1,2,0), nms_boxes """Plots predicted bounding boxes on the image""" cmap = plt.get_cmap("tab20b") class_labels = config.PASCAL_CLASSES colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] im = np.array(image) height, width, _ = im.shape # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(im) # box[0] is x midpoint, box[2] is width # box[1] is y midpoint, box[3] is height # Create a Rectangle patch for box in boxes: assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" class_pred = box[0] box = box[2:] upper_left_x = box[0] - box[2] / 2 upper_left_y = box[1] - box[3] / 2 rect = patches.Rectangle( (upper_left_x * width, upper_left_y * height), box[2] * width, box[3] * height, linewidth=2, edgecolor=colors[int(class_pred)], facecolor="none", ) # Add the patch to the Axes ax.add_patch(rect) plt.text( upper_left_x * width, upper_left_y * height, s=class_labels[int(class_pred)], color="white", verticalalignment="top", bbox={"color": colors[int(class_pred)], "pad": 0}, ) plt.axis('off') fig.canvas.draw() fig_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) fig_img = fig_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) plt.close(fig) outputs_inference_bb = fig_img ### GradCAM target_layer = [model.layers[-2]] cam = gcf.BaseCAM(model, target_layer) AnG = ActivationsAndGradients(model, target_layer, None) outputs = AnG(transform_img) bboxes = [[] for _ in range(1)] for i in range(3): batch_size, A, S, _, _ = outputs[i].shape anchor = config.SCALED_ANCHORS[i] boxes_scale_i = utils.cells_to_bboxes( outputs[i], anchor, S=S, is_preds=True ) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box nms_boxes = utils.non_max_suppression( bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint", ) target_categories = [box[0] for box in nms_boxes] targets = [ClassifierOutputTarget( category) for category in target_categories] help_ = cam.compute_cam_per_layer(transform_img, targets, False) output_gc = cam.aggregate_multi_layers(help_)[0, :, :] img = cv2.resize(input_img, (416, 416)) img = np.float32(img) / 255 cam_image = show_cam_on_image(img, output_gc, use_rgb=True, image_weight=gc_trans) outputs_inference_gc = cam_image else: outputs_inference_bb = None outputs_inference_gc = None return outputs_inference_bb, outputs_inference_gc title = "PASCAL VOC trained on Yolov3" description = "A simple Gradio interface to infer on Yolov3 model, and get GradCAM results. PASCAL VOC has the following object classes: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor" examples = [['examples/test_'+str(i)+'.jpg', 0.6, 0.5, 0.3] for i in range(10)] demo = gr.Interface(inference, inputs = [gr.Image(label="Input image"), gr.Slider(0, 1, value=0.6, label="IOU Threshold"), gr.Slider(0, 1, value=0.4, label="Threshold"), gr.Slider(0, 1, value=0.5, label="GradCAM Transparency"), ], outputs = [ gr.Image(label="Yolov3 Prediction"), gr.Image(label="GradCAM Output"),], title = title, description = description, examples = examples ) demo.launch()