|
import torch, torchvision |
|
from torchvision import transforms |
|
import numpy as np |
|
import gradio as gr |
|
from PIL import Image |
|
|
|
from torch.utils.data import DataLoader |
|
import itertools |
|
import matplotlib.pyplot as plt |
|
import matplotlib.patches as patches |
|
import cv2 |
|
|
|
import config as config |
|
from model import YOLOv3 |
|
from loss import YoloLoss |
|
from utils import get_loaders |
|
import utils |
|
|
|
new_state_dict = {} |
|
state_dict = torch.load('results/Yolov3_Lavanya.pth', map_location=torch.device('cpu')) |
|
for key, value in state_dict.items(): |
|
new_key = key.replace('model.', '') |
|
new_state_dict[new_key] = value |
|
|
|
model = YOLOv3(in_channels=3, num_classes=config.NUM_CLASSES) |
|
model.load_state_dict(new_state_dict, strict=True) |
|
model.eval() |
|
|
|
classes = ("aeroplane", |
|
"bicycle", |
|
"bird", |
|
"boat", |
|
"bottle", |
|
"bus", |
|
"car", |
|
"cat", |
|
"chair", |
|
"cow", |
|
"diningtable", |
|
"dog", |
|
"horse", |
|
"motorbike", |
|
"person", |
|
"pottedplant", |
|
"sheep", |
|
"sofa", |
|
"train", |
|
"tvmonitor") |
|
|
|
|
|
import grad_cam_func as gcf |
|
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget |
|
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients |
|
from pytorch_grad_cam.utils.image import show_cam_on_image |
|
|
|
def inference(input_img=None, iou_threshold=0.6, conf_threshold=0.5, gc_trans=0.3): |
|
|
|
if input_img is not None: |
|
|
|
tranform_img = config.infer_transforms(image=input_img) |
|
transform_img = tranform_img['image'].unsqueeze(0) |
|
|
|
transform_img_visual = config.infer_transforms_visualization(image=input_img)['image'] |
|
|
|
with torch.no_grad(): |
|
outputs = model(transform_img) |
|
bboxes = [[] for _ in range(transform_img.shape[0])] |
|
|
|
for i in range(3): |
|
batch_size, A, S, _, _ = outputs[i].shape |
|
anchor = np.array(config.SCALED_ANCHORS[i]) |
|
boxes_scale_i = utils.cells_to_bboxes( |
|
outputs[i], anchor, S=S, is_preds=True) |
|
|
|
for idx, (box) in enumerate(boxes_scale_i): |
|
bboxes[idx] += box |
|
|
|
|
|
nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=iou_threshold, |
|
threshold=conf_threshold, box_format="midpoint",) |
|
|
|
|
|
image, boxes = transform_img_visual.permute(1,2,0), nms_boxes |
|
|
|
"""Plots predicted bounding boxes on the image""" |
|
cmap = plt.get_cmap("tab20b") |
|
class_labels = config.PASCAL_CLASSES |
|
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] |
|
|
|
im = np.array(image) |
|
height, width, _ = im.shape |
|
|
|
|
|
fig, ax = plt.subplots(1) |
|
|
|
|
|
ax.imshow(im) |
|
|
|
|
|
|
|
|
|
|
|
for box in boxes: |
|
assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" |
|
class_pred = box[0] |
|
box = box[2:] |
|
upper_left_x = box[0] - box[2] / 2 |
|
upper_left_y = box[1] - box[3] / 2 |
|
rect = patches.Rectangle( |
|
(upper_left_x * width, upper_left_y * height), |
|
box[2] * width, |
|
box[3] * height, |
|
linewidth=2, |
|
edgecolor=colors[int(class_pred)], |
|
facecolor="none", |
|
) |
|
|
|
ax.add_patch(rect) |
|
plt.text( |
|
upper_left_x * width, |
|
upper_left_y * height, |
|
s=class_labels[int(class_pred)], |
|
color="white", |
|
verticalalignment="top", |
|
bbox={"color": colors[int(class_pred)], "pad": 0}, |
|
) |
|
|
|
plt.axis('off') |
|
|
|
fig.canvas.draw() |
|
|
|
fig_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) |
|
fig_img = fig_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) |
|
|
|
plt.close(fig) |
|
|
|
outputs_inference_bb = fig_img |
|
|
|
|
|
|
|
target_layer = [model.layers[-2]] |
|
cam = gcf.BaseCAM(model, target_layer) |
|
|
|
AnG = ActivationsAndGradients(model, target_layer, None) |
|
outputs = AnG(transform_img) |
|
|
|
bboxes = [[] for _ in range(1)] |
|
for i in range(3): |
|
batch_size, A, S, _, _ = outputs[i].shape |
|
anchor = config.SCALED_ANCHORS[i] |
|
boxes_scale_i = utils.cells_to_bboxes( |
|
outputs[i], anchor, S=S, is_preds=True |
|
) |
|
for idx, (box) in enumerate(boxes_scale_i): |
|
bboxes[idx] += box |
|
|
|
nms_boxes = utils.non_max_suppression( |
|
bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint", |
|
) |
|
|
|
target_categories = [box[0] for box in nms_boxes] |
|
targets = [ClassifierOutputTarget( |
|
category) for category in target_categories] |
|
|
|
help_ = cam.compute_cam_per_layer(transform_img, targets, False) |
|
|
|
output_gc = cam.aggregate_multi_layers(help_)[0, :, :] |
|
|
|
img = cv2.resize(input_img, (416, 416)) |
|
img = np.float32(img) / 255 |
|
cam_image = show_cam_on_image(img, output_gc, use_rgb=True, image_weight=gc_trans) |
|
|
|
outputs_inference_gc = cam_image |
|
|
|
else: |
|
outputs_inference_bb = None |
|
outputs_inference_gc = None |
|
|
|
return outputs_inference_bb, outputs_inference_gc |
|
|
|
|
|
|
|
title = "PASCAL VOC trained on Yolov3" |
|
description = "A simple Gradio interface to infer on Yolov3 model, and get GradCAM results. PASCAL VOC has the following object classes: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor" |
|
examples = [['examples/test_'+str(i)+'.jpg', 0.6, 0.5, 0.3] for i in range(10)] |
|
|
|
demo = gr.Interface(inference, |
|
inputs = [gr.Image(label="Input image"), |
|
gr.Slider(0, 1, value=0.6, label="IOU Threshold"), |
|
gr.Slider(0, 1, value=0.4, label="Threshold"), |
|
gr.Slider(0, 1, value=0.5, label="GradCAM Transparency"), |
|
], |
|
outputs = [ |
|
gr.Image(label="Yolov3 Prediction"), |
|
gr.Image(label="GradCAM Output"),], |
|
title = title, |
|
description = description, |
|
examples = examples |
|
) |
|
demo.launch() |