Return and show bounding box confidence
Browse files- rtmo_demo.py +2 -2
- rtmo_demo_batch.py +6 -4
- rtmo_gpu.py +44 -17
rtmo_demo.py
CHANGED
|
@@ -36,7 +36,7 @@ if __name__ == "__main__":
|
|
| 36 |
if not success:
|
| 37 |
break
|
| 38 |
|
| 39 |
-
frame_out, bboxes, keypoints, scores = body(frame)
|
| 40 |
|
| 41 |
if keypoints is not None:
|
| 42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
@@ -56,7 +56,7 @@ if __name__ == "__main__":
|
|
| 56 |
scores,
|
| 57 |
kpt_thr=0.3,
|
| 58 |
line_width=2)
|
| 59 |
-
img_show = draw_bbox(img_show, bboxes)
|
| 60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 62 |
cv2.imshow(f'{model}', img_show)
|
|
|
|
| 36 |
if not success:
|
| 37 |
break
|
| 38 |
|
| 39 |
+
frame_out, bboxes, bboxes_scores, keypoints, scores = body(frame)
|
| 40 |
|
| 41 |
if keypoints is not None:
|
| 42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
|
|
| 56 |
scores,
|
| 57 |
kpt_thr=0.3,
|
| 58 |
line_width=2)
|
| 59 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
| 60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 62 |
cv2.imshow(f'{model}', img_show)
|
rtmo_demo_batch.py
CHANGED
|
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 24 |
# Process the batch when it's full
|
| 25 |
if len(batch_frames) == batch_size:
|
| 26 |
s = time.time()
|
| 27 |
-
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 28 |
det_time = time.time() - s
|
| 29 |
fps = round(batch_size / det_time, 1)
|
| 30 |
print(f'Batch det: {fps} FPS')
|
|
@@ -33,9 +33,10 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 33 |
scores = batch_scores[i]
|
| 34 |
frame = batch_frames[i]
|
| 35 |
bboxes = batch_bboxes[i]
|
|
|
|
| 36 |
img_show = frame.copy()
|
| 37 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 38 |
-
img_show = draw_bbox(img_show, bboxes)
|
| 39 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 40 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 41 |
cv2.imshow(f'{video_path}', img_show)
|
|
@@ -54,14 +55,15 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 54 |
|
| 55 |
# Option 2: Duplicate the last frame
|
| 56 |
batch_frames.append(batch_frames[-1])
|
| 57 |
-
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 58 |
for i, keypoints in enumerate(batch_keypoints):
|
| 59 |
scores = batch_scores[i]
|
| 60 |
frame = batch_frames[i]
|
| 61 |
bboxes = batch_bboxes[i]
|
|
|
|
| 62 |
img_show = frame.copy()
|
| 63 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 64 |
-
img_show = draw_bbox(img_show, bboxes)
|
| 65 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 66 |
cv2.imshow(f'{video_path}', img_show)
|
| 67 |
#cv2.waitKey(10)
|
|
|
|
| 24 |
# Process the batch when it's full
|
| 25 |
if len(batch_frames) == batch_size:
|
| 26 |
s = time.time()
|
| 27 |
+
batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 28 |
det_time = time.time() - s
|
| 29 |
fps = round(batch_size / det_time, 1)
|
| 30 |
print(f'Batch det: {fps} FPS')
|
|
|
|
| 33 |
scores = batch_scores[i]
|
| 34 |
frame = batch_frames[i]
|
| 35 |
bboxes = batch_bboxes[i]
|
| 36 |
+
bboxes_scores = batch_bboxes_scores[i]
|
| 37 |
img_show = frame.copy()
|
| 38 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 39 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
| 40 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 41 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 42 |
cv2.imshow(f'{video_path}', img_show)
|
|
|
|
| 55 |
|
| 56 |
# Option 2: Duplicate the last frame
|
| 57 |
batch_frames.append(batch_frames[-1])
|
| 58 |
+
batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 59 |
for i, keypoints in enumerate(batch_keypoints):
|
| 60 |
scores = batch_scores[i]
|
| 61 |
frame = batch_frames[i]
|
| 62 |
bboxes = batch_bboxes[i]
|
| 63 |
+
bboxes_scores = batch_bboxes_scores[i]
|
| 64 |
img_show = frame.copy()
|
| 65 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 66 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
| 67 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 68 |
cv2.imshow(f'{video_path}', img_show)
|
| 69 |
#cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
|
@@ -207,12 +207,32 @@ def draw_mmpose(img,
|
|
| 207 |
|
| 208 |
return img
|
| 209 |
|
| 210 |
-
def draw_bbox(img, bboxes,
|
| 211 |
-
for bbox in bboxes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
| 213 |
-
(int(bbox[2]), int(bbox[3])),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
return img
|
| 215 |
-
|
| 216 |
# with simplification to use onnxruntime only
|
| 217 |
def draw_skeleton(img,
|
| 218 |
keypoints,
|
|
@@ -333,6 +353,8 @@ class RTMO_GPU(object):
|
|
| 333 |
tuple:
|
| 334 |
- final_boxes (np.ndarray): Final bounding boxes.
|
| 335 |
- final_scores (np.ndarray): Final scores.
|
|
|
|
|
|
|
| 336 |
"""
|
| 337 |
|
| 338 |
if not self.is_yolo_nas_pose:
|
|
@@ -346,6 +368,7 @@ class RTMO_GPU(object):
|
|
| 346 |
isscore = final_scores > 0.3
|
| 347 |
isbbox = [i for i in isscore]
|
| 348 |
final_boxes = final_boxes[isbbox]
|
|
|
|
| 349 |
|
| 350 |
# decode pose outputs
|
| 351 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
@@ -359,14 +382,15 @@ class RTMO_GPU(object):
|
|
| 359 |
if flat_predictions.shape[0] > 0: # at least one person found
|
| 360 |
mask = flat_predictions[:, 0] == 0
|
| 361 |
final_boxes = flat_predictions[mask, 1:5]
|
|
|
|
| 362 |
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
| 363 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
| 364 |
keypoints = keypoints / ratio
|
| 365 |
final_boxes = final_boxes / ratio
|
| 366 |
else: # no detection
|
| 367 |
-
final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
| 368 |
|
| 369 |
-
return final_boxes, keypoints, scores
|
| 370 |
|
| 371 |
def inference(self, img: np.ndarray):
|
| 372 |
"""Inference model.
|
|
@@ -425,9 +449,9 @@ class RTMO_GPU(object):
|
|
| 425 |
|
| 426 |
outputs = self.inference(image)
|
| 427 |
|
| 428 |
-
bboxes, keypoints, scores = self.postprocess(outputs, ratio)
|
| 429 |
|
| 430 |
-
return bboxes, keypoints, scores
|
| 431 |
|
| 432 |
def __init__(self,
|
| 433 |
model: str = None,
|
|
@@ -569,22 +593,24 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
| 569 |
batch_keypoints = []
|
| 570 |
batch_scores = []
|
| 571 |
batch_bboxes = []
|
|
|
|
| 572 |
|
| 573 |
b_dets, b_keypoints = outputs
|
| 574 |
for i, ratio in enumerate(ratios):
|
| 575 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
| 576 |
-
bboxes, keypoints, scores = super().postprocess(output, ratio)
|
| 577 |
batch_keypoints.append(keypoints)
|
| 578 |
batch_scores.append(scores)
|
| 579 |
batch_bboxes.append(bboxes)
|
|
|
|
| 580 |
|
| 581 |
-
return batch_bboxes, batch_keypoints, batch_scores
|
| 582 |
|
| 583 |
def __batch_call__(self, images: List[np.ndarray]):
|
| 584 |
batch_img, ratios = self.preprocess_batch(images)
|
| 585 |
outputs = self.inference(batch_img)
|
| 586 |
-
bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
| 587 |
-
return bboxes, keypoints, scores
|
| 588 |
|
| 589 |
def __call__(self, image: np.array, camera_id = 0):
|
| 590 |
|
|
@@ -600,18 +626,19 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
| 600 |
in_queue.put(image)
|
| 601 |
|
| 602 |
if len(self.buffers[camera_id]) == self.batch_size:
|
| 603 |
-
b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
|
| 604 |
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
| 605 |
bboxes = b_bboxes[i]
|
| 606 |
-
|
|
|
|
| 607 |
self.buffers[camera_id] = []
|
| 608 |
|
| 609 |
-
frame, bboxes, keypoints, scores = None, None, None, None
|
| 610 |
if not out_queue.empty():
|
| 611 |
-
bboxes, keypoints, scores = out_queue.get()
|
| 612 |
frame = in_queue.get()
|
| 613 |
|
| 614 |
-
return frame, bboxes, keypoints, scores
|
| 615 |
|
| 616 |
|
| 617 |
def __init__(self,
|
|
|
|
| 207 |
|
| 208 |
return img
|
| 209 |
|
| 210 |
+
def draw_bbox(img, bboxes, bboxes_scores=None, color=None):
|
| 211 |
+
for i, bbox in enumerate(bboxes):
|
| 212 |
+
# Determine the color based on the score if no color is given
|
| 213 |
+
if color is None and bboxes_scores is not None:
|
| 214 |
+
# Scale the score to a color range (green to red)
|
| 215 |
+
score = bboxes_scores[i]
|
| 216 |
+
green = int((1 - score) * 255)
|
| 217 |
+
red = int(score * 255)
|
| 218 |
+
box_color = (0, green, red)
|
| 219 |
+
else:
|
| 220 |
+
box_color = color if color is not None else (0, 255, 0)
|
| 221 |
+
|
| 222 |
+
# Draw the bounding box
|
| 223 |
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
| 224 |
+
(int(bbox[2]), int(bbox[3])), box_color, 1)
|
| 225 |
+
|
| 226 |
+
# Display the score at the top-right corner of the bounding box
|
| 227 |
+
if bboxes_scores is not None:
|
| 228 |
+
score_text = f'{bboxes_scores[i]:.2f}'
|
| 229 |
+
text_size, _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
| 230 |
+
text_x = int(bbox[2]) - text_size[0]
|
| 231 |
+
text_y = int(bbox[1]) + text_size[1]
|
| 232 |
+
img = cv2.putText(img, score_text, (text_x, text_y),
|
| 233 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1, cv2.LINE_AA)
|
| 234 |
return img
|
| 235 |
+
|
| 236 |
# with simplification to use onnxruntime only
|
| 237 |
def draw_skeleton(img,
|
| 238 |
keypoints,
|
|
|
|
| 353 |
tuple:
|
| 354 |
- final_boxes (np.ndarray): Final bounding boxes.
|
| 355 |
- final_scores (np.ndarray): Final scores.
|
| 356 |
+
- final keypoints
|
| 357 |
+
- final keypoints scores
|
| 358 |
"""
|
| 359 |
|
| 360 |
if not self.is_yolo_nas_pose:
|
|
|
|
| 368 |
isscore = final_scores > 0.3
|
| 369 |
isbbox = [i for i in isscore]
|
| 370 |
final_boxes = final_boxes[isbbox]
|
| 371 |
+
final_boxes_scores = final_scores[isbbox]
|
| 372 |
|
| 373 |
# decode pose outputs
|
| 374 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
|
|
| 382 |
if flat_predictions.shape[0] > 0: # at least one person found
|
| 383 |
mask = flat_predictions[:, 0] == 0
|
| 384 |
final_boxes = flat_predictions[mask, 1:5]
|
| 385 |
+
final_boxes_scores = flat_predictions[mask, 5]
|
| 386 |
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
| 387 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
| 388 |
keypoints = keypoints / ratio
|
| 389 |
final_boxes = final_boxes / ratio
|
| 390 |
else: # no detection
|
| 391 |
+
final_boxes, final_boxes_scores, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 1)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
| 392 |
|
| 393 |
+
return final_boxes, final_boxes_scores, keypoints, scores
|
| 394 |
|
| 395 |
def inference(self, img: np.ndarray):
|
| 396 |
"""Inference model.
|
|
|
|
| 449 |
|
| 450 |
outputs = self.inference(image)
|
| 451 |
|
| 452 |
+
bboxes, bboxes_scores, keypoints, scores = self.postprocess(outputs, ratio)
|
| 453 |
|
| 454 |
+
return bboxes, bboxes_scores, keypoints, scores
|
| 455 |
|
| 456 |
def __init__(self,
|
| 457 |
model: str = None,
|
|
|
|
| 593 |
batch_keypoints = []
|
| 594 |
batch_scores = []
|
| 595 |
batch_bboxes = []
|
| 596 |
+
batch_bboxes_scores = []
|
| 597 |
|
| 598 |
b_dets, b_keypoints = outputs
|
| 599 |
for i, ratio in enumerate(ratios):
|
| 600 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
| 601 |
+
bboxes, bboxes_scores, keypoints, scores = super().postprocess(output, ratio)
|
| 602 |
batch_keypoints.append(keypoints)
|
| 603 |
batch_scores.append(scores)
|
| 604 |
batch_bboxes.append(bboxes)
|
| 605 |
+
batch_bboxes_scores.append(bboxes_scores)
|
| 606 |
|
| 607 |
+
return batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores
|
| 608 |
|
| 609 |
def __batch_call__(self, images: List[np.ndarray]):
|
| 610 |
batch_img, ratios = self.preprocess_batch(images)
|
| 611 |
outputs = self.inference(batch_img)
|
| 612 |
+
bboxes, bboxes_scores, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
| 613 |
+
return bboxes, bboxes_scores, keypoints, scores
|
| 614 |
|
| 615 |
def __call__(self, image: np.array, camera_id = 0):
|
| 616 |
|
|
|
|
| 626 |
in_queue.put(image)
|
| 627 |
|
| 628 |
if len(self.buffers[camera_id]) == self.batch_size:
|
| 629 |
+
b_bboxes, b_bboxes_scores, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
|
| 630 |
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
| 631 |
bboxes = b_bboxes[i]
|
| 632 |
+
bboxes_scores = b_bboxes_scores[i]
|
| 633 |
+
out_queue.put((bboxes, bboxes_scores, keypoints, scores))
|
| 634 |
self.buffers[camera_id] = []
|
| 635 |
|
| 636 |
+
frame, bboxes, bboxes_scores, keypoints, scores = None, None, None, None, None
|
| 637 |
if not out_queue.empty():
|
| 638 |
+
bboxes, bboxes_scores, keypoints, scores = out_queue.get()
|
| 639 |
frame = in_queue.get()
|
| 640 |
|
| 641 |
+
return frame, bboxes, bboxes_scores, keypoints, scores
|
| 642 |
|
| 643 |
|
| 644 |
def __init__(self,
|