import json import os import requests from urllib.parse import urlparse from requests.exceptions import HTTPError import sys from pathlib import Path import textwrap import ast import os import numpy as np from PIL import Image import matplotlib.pyplot as plt import matplotlib.pylab as pylab pylab.rcParams['figure.figsize'] = 20, 12 import cv2 import base64 import io def download_images_from_jsonl(jsonl_path, output_folder): with open(jsonl_path, 'r') as jsonl_file: for line in jsonl_file: json_obj = json.loads(line) url = json_obj['url'] # download_image(url, output_folder) vis_image(json_obj, output_folder) def download_image(url, output_folder): try: response = requests.get(url) response.raise_for_status() except HTTPError as e: print(f"Error while downloading {url}: {e}") return file_name = os.path.basename(urlparse(url).path) output_path = os.path.join(output_folder, file_name) with open(output_path, 'wb') as file: file.write(response.content) def imshow(img, file_name = "tmp.jpg", caption='test'): # Create figure and axis objects fig, ax = plt.subplots() # Show image on axis ax.imshow(img[:, :, [2, 1, 0]]) ax.set_axis_off() # Set caption text # Add caption below image ax.text(0.5, -0.2, '\n'.join(textwrap.wrap(caption, 120)), ha='center', transform=ax.transAxes, fontsize=18) plt.savefig(file_name, bbox_inches='tight') plt.close() def vis_image(json_obj, output_folder): url = json_obj['url'] try: response = requests.get(url) response.raise_for_status() file_name = os.path.basename(urlparse(url).path) # output_path = os.path.join(output_folder, file_name) file_key_name = json_obj['key'] + os.path.splitext(file_name)[1] output_path = os.path.join(output_folder, file_key_name) except Exception as e: print(f"Error while downloading {url}: {e}") return with open(output_path, 'wb') as file: file.write(response.content) try: pil_img = Image.open(output_path).convert("RGB") except: return image = np.array(pil_img)[:, :, [2, 1, 0]] image_h = pil_img.height image_w = pil_img.width caption = json_obj['caption'] def is_overlapping(rect1, rect2): x1, y1, x2, y2 = rect1 x3, y3, x4, y4 = rect2 return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4) grounding_list = json_obj['ref_exps'] new_image = image.copy() previous_locations = [] previous_bboxes = [] text_offset = 10 text_offset_original = 4 text_size = max(0.07 * min(image_h, image_w) / 100, 0.5) text_line = int(max(1 * min(image_h, image_w) / 512, 1)) box_line = int(max(2 * min(image_h, image_w) / 512, 2)) text_height = text_offset # init # pdb.set_trace() for (phrase_s, phrase_e, x1_norm, y1_norm, x2_norm, y2_norm, score) in grounding_list: phrase = caption[phrase_s:phrase_e] x1, y1, x2, y2 = int(x1_norm * image_w), int(y1_norm * image_h), int(x2_norm * image_w), int(y2_norm * image_h) print(f"Decode results: {phrase} - {[x1, y1, x2, y2]}") # draw bbox # random color color = tuple(np.random.randint(0, 255, size=3).tolist()) new_image = cv2.rectangle(new_image, (x1, y1), (x2, y2), color, box_line) # add phrase name # decide the text location first for x_prev, y_prev in previous_locations: if abs(x1 - x_prev) < abs(text_offset) and abs(y1 - y_prev) < abs(text_offset): y1 += text_height if y1 < 2 * text_offset: y1 += text_offset + text_offset_original # add text background (text_width, text_height), _ = cv2.getTextSize(phrase, cv2.FONT_HERSHEY_SIMPLEX, text_size, text_line) text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = x1, y1 - text_height - text_offset_original, x1 + text_width, y1 for prev_bbox in previous_bboxes: while is_overlapping((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox): text_bg_y1 += text_offset text_bg_y2 += text_offset y1 += text_offset if text_bg_y2 >= image_h: text_bg_y1 = max(0, image_h - text_height - text_offset_original) text_bg_y2 = image_h y1 = max(0, image_h - text_height - text_offset_original + text_offset) break alpha = 0.5 for i in range(text_bg_y1, text_bg_y2): for j in range(text_bg_x1, text_bg_x2): if i < image_h and j < image_w: new_image[i, j] = (alpha * new_image[i, j] + (1 - alpha) * np.array(color)).astype(np.uint8) cv2.putText( new_image, phrase, (x1, y1 - text_offset_original), cv2.FONT_HERSHEY_SIMPLEX, text_size, (0, 0, 0), text_line, cv2.LINE_AA ) previous_locations.append((x1, y1)) previous_bboxes.append((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2)) try: file_key_name = json_obj['key'] + '_exp' + os.path.splitext(file_name)[1] output_path = os.path.join(output_folder, file_key_name) imshow(new_image, file_name= output_path, caption=caption) except: # Out of (supported formats: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff, webp) return if __name__ == '__main__': # you need to download the jsonl before run this file jsonl_path = '/tmp/grit_coyo.jsonl' output_folder = './output/vis_grit' if not os.path.exists(output_folder): os.makedirs(output_folder) download_images_from_jsonl(jsonl_path, output_folder)