Grounded-Segment-Anything

Runtime error

App Files Files Community

liuyizhang commited on Sep 7, 2023

Commit

2a71ebd

1 Parent(s): 5c28041

add time cost by step (ms)

Browse files

Files changed (3) hide show

app.py +40 -12
kosmos_utils.py +1 -1
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -519,24 +519,42 @@ def relate_anything(input_image, k):
 mask_source_draw = "draw a mask on input image"
 mask_source_segment = "type what to detect below"
 def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
     if (task_type == 'Kosmos-2'):
         global kosmos_model, kosmos_processor
         kosmos_image, kosmos_text, kosmos_entities = kosmos_generate_predictions(input_image, kosmos_input, kosmos_model, kosmos_processor)
-        return None, None, kosmos_image, kosmos_text, kosmos_entities
     if (task_type == 'relate anything'):
         output_images = relate_anything(input_image['image'], num_relation)
-        return output_images, gr.Gallery.update(label='relate images'), None, None, None
     text_prompt = text_prompt.strip()
     if not ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw):
         if text_prompt == '':
-            return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), None, None, None
     if input_image is None:
-            return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), None, None, None
     file_temp = int(time.time())
     logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/{inpaint_mode}/[{mask_source_radio}]/{remove_mode}/{remove_mask_extend}_[{text_prompt}]/[{inpaint_prompt}]___1_')
@@ -552,10 +570,12 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         image_pil, image = load_image(input_image['image'].convert("RGB"))
         input_img = input_image['image']
         output_images.append(input_image['image'])
     else:
         image_pil, image = load_image(input_image.convert("RGB"))
         input_img = input_image
         output_images.append(input_image)
     size = image_pil.size
@@ -576,7 +596,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         )
         if boxes_filt.size(0) == 0:
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_[{text_prompt}]_1___{groundingdino_device}/[No objects detected, please try others.]_')
-            return [], gr.Gallery.update(label='No objects detected, please try others.😂😂😂😂'), None, None, None
         boxes_filt_ori = copy.deepcopy(boxes_filt)
         pred_dict = {
@@ -587,6 +607,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         image_with_box = plot_boxes_to_image(copy.deepcopy(image_pil), pred_dict)[0]
         output_images.append(image_with_box)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_segment):
@@ -622,12 +643,13 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         plt.savefig(image_path, bbox_inches="tight")
         segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
         os.remove(image_path)
-        output_images.append(segment_image_result)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
     if task_type == 'detection' or task_type == 'segment':
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
-        return output_images, gr.Gallery.update(label='result images'), None, None, None
     elif task_type == 'inpainting' or task_type == 'remove':
         if inpaint_prompt.strip() == '' and mask_source_radio == mask_source_segment:
             task_type = 'remove'
@@ -644,6 +666,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
             mask = masks[0][0].cpu().numpy()
             mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
         if task_type == 'inpainting':
             # inpainting pipeline
@@ -682,21 +705,24 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
                                         extend_pixels=remove_mask_extend, useRectangle=useRectangle)
                         mask_imgs.append(mask_pil_exp)
                 mask_pil = mix_masks(mask_imgs)
-                output_images.append(mask_pil.convert("RGB"))
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
             image_inpainting = lama_cleaner_process(np.array(image_pil), np.array(mask_pil.convert("L")), cleaner_size_limit)
             # output_images.append(image_inpainting)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_7_')
         image_inpainting = image_inpainting.resize((image_pil.size[0], image_pil.size[1]))
         output_images.append(image_inpainting)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
-        return output_images, gr.Gallery.update(label='result images'), None, None, None
     else:
         logger.info(f"task_type:{task_type} error!")
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
-    return output_images, gr.Gallery.update(label='result images'), None, None, None
 def change_radio_display(task_type, mask_source_radio):
     text_prompt_visible = True
@@ -828,7 +854,9 @@ if __name__ == "__main__":
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", visible=True
-                    ).style(preview=True, columns=[5], object_fit="scale-down", height="auto")
                 kosmos_output = gr.Image(type="pil", label="result images", visible=False)
                 kosmos_text_output = gr.HighlightedText(
                                     label="Generated Description",
@@ -860,7 +888,7 @@ if __name__ == "__main__":
             run_button.click(fn=run_anything_task, inputs=[
                             input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
                             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
-                            outputs=[image_gallery, image_gallery, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])

 mask_source_draw = "draw a mask on input image"
 mask_source_segment = "type what to detect below"
+def get_time_cost(run_task_time, time_cost_str):
+    now_time = int(time.time()*1000)
+    if run_task_time == 0:
+        time_cost_str = 'start'
+    else:
+        if time_cost_str != '':
+            time_cost_str += f'-->'
+        time_cost_str += f'{now_time - run_task_time}'
+    run_task_time = now_time
+    return run_task_time, time_cost_str
 def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
+    run_task_time = 0
+    time_cost_str = ''
+    run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     if (task_type == 'Kosmos-2'):
         global kosmos_model, kosmos_processor
         kosmos_image, kosmos_text, kosmos_entities = kosmos_generate_predictions(input_image, kosmos_input, kosmos_model, kosmos_processor)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
     if (task_type == 'relate anything'):
         output_images = relate_anything(input_image['image'], num_relation)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        return output_images, gr.Gallery.update(label='relate images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     text_prompt = text_prompt.strip()
     if not ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw):
         if text_prompt == '':
+            return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     if input_image is None:
+            return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     file_temp = int(time.time())
     logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/{inpaint_mode}/[{mask_source_radio}]/{remove_mode}/{remove_mask_extend}_[{text_prompt}]/[{inpaint_prompt}]___1_')
         image_pil, image = load_image(input_image['image'].convert("RGB"))
         input_img = input_image['image']
         output_images.append(input_image['image'])
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     else:
         image_pil, image = load_image(input_image.convert("RGB"))
         input_img = input_image
         output_images.append(input_image)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     size = image_pil.size
         )
         if boxes_filt.size(0) == 0:
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_[{text_prompt}]_1___{groundingdino_device}/[No objects detected, please try others.]_')
+            return [], gr.Gallery.update(label='No objects detected, please try others.😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
         boxes_filt_ori = copy.deepcopy(boxes_filt)
         pred_dict = {
         image_with_box = plot_boxes_to_image(copy.deepcopy(image_pil), pred_dict)[0]
         output_images.append(image_with_box)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_segment):
         plt.savefig(image_path, bbox_inches="tight")
         segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
         os.remove(image_path)
+        output_images.append(segment_image_result)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
     if task_type == 'detection' or task_type == 'segment':
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
+        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     elif task_type == 'inpainting' or task_type == 'remove':
         if inpaint_prompt.strip() == '' and mask_source_radio == mask_source_segment:
             task_type = 'remove'
             mask = masks[0][0].cpu().numpy()
             mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         if task_type == 'inpainting':
             # inpainting pipeline
                                         extend_pixels=remove_mask_extend, useRectangle=useRectangle)
                         mask_imgs.append(mask_pil_exp)
                 mask_pil = mix_masks(mask_imgs)
+                output_images.append(mask_pil.convert("RGB"))
+                run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
             image_inpainting = lama_cleaner_process(np.array(image_pil), np.array(mask_pil.convert("L")), cleaner_size_limit)
             # output_images.append(image_inpainting)
+            # run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_7_')
         image_inpainting = image_inpainting.resize((image_pil.size[0], image_pil.size[1]))
         output_images.append(image_inpainting)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
+        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     else:
         logger.info(f"task_type:{task_type} error!")
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
+    return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
 def change_radio_display(task_type, mask_source_radio):
     text_prompt_visible = True
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", visible=True
+                    ).style(preview=True, columns=[5], object_fit="scale-down", height="auto")
+                time_cost = gr.Textbox(label="Time cost by step (ms):", visible=False, interactive=False)
                 kosmos_output = gr.Image(type="pil", label="result images", visible=False)
                 kosmos_text_output = gr.HighlightedText(
                                     label="Generated Description",
             run_button.click(fn=run_anything_task, inputs=[
                             input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
                             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
+                            outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])

kosmos_utils.py CHANGED Viewed

@@ -230,4 +230,4 @@ def kosmos_generate_predictions(image_input, text_input, kosmos_model, kosmos_pr
     if end < len(processed_text):
         colored_text.append((processed_text[end:len(processed_text)], None))
-    return annotated_image, colored_text, str(filtered_entities)

     if end < len(processed_text):
         colored_text.append((processed_text[end:len(processed_text)], None))
+    return annotated_image, colored_text, str(filtered_entities)

requirements.txt CHANGED Viewed

@@ -17,7 +17,7 @@ termcolor
 timm
 torch
 torchvision
-transformers
 yapf
 numba
 scipy

 timm
 torch
 torchvision
+transformers==4.27.4
 yapf
 numba
 scipy