y22ma
/

Kosmos2-endpoint

Model card Files Files and versions Community

Yan commited on Aug 31, 2023

Commit

226d5bb

1 Parent(s): e17a909

removed debug print

Browse files

Files changed (1) hide show

handler.py +0 -9

handler.py CHANGED Viewed

@@ -182,25 +182,20 @@ class EndpointHandler():
         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
-        print("Entered endpoint")
         image = data.pop("image", None)
         image_input = self.decode_base64_image(image)
-        print("Image decoded")
         # Save the image and load it again to match the original Kosmos-2 demo.
         # (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
         user_image_path = "/tmp/user_input_test_image.jpg"
         image_input.save(user_image_path)
-        print("Image saved")
         # This might give different results from the original argument `image_input`
         image_input = Image.open(user_image_path)
         text_input = "<grounding>Describe this image in detail:"
         #text_input = f"<grounding>{text_input}"
-        print("Image loaded again")
         inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
-        print("Processed")
         generated_ids = self.model.generate(
             pixel_values=inputs["pixel_values"].to("cuda"),
@@ -211,13 +206,10 @@ class EndpointHandler():
             use_cache=True,
             max_new_tokens=128,
         )
-        print("Generated")
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        print("Generated text")
         # By default, the generated  text is cleanup and the entities are extracted.
         processed_text, entities = self.processor.post_process_generation(generated_text)
-        print("Generated text processed")
         annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
@@ -247,7 +239,6 @@ class EndpointHandler():
         if end < len(processed_text):
             colored_text.append((processed_text[end:len(processed_text)], None))
-        print("Outputs prepared")
         return annotated_image, colored_text, str(filtered_entities)

         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
         image = data.pop("image", None)
         image_input = self.decode_base64_image(image)
         # Save the image and load it again to match the original Kosmos-2 demo.
         # (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
         user_image_path = "/tmp/user_input_test_image.jpg"
         image_input.save(user_image_path)
         # This might give different results from the original argument `image_input`
         image_input = Image.open(user_image_path)
         text_input = "<grounding>Describe this image in detail:"
         #text_input = f"<grounding>{text_input}"
         inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
         generated_ids = self.model.generate(
             pixel_values=inputs["pixel_values"].to("cuda"),
             use_cache=True,
             max_new_tokens=128,
         )
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         # By default, the generated  text is cleanup and the entities are extracted.
         processed_text, entities = self.processor.post_process_generation(generated_text)
         annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
         if end < len(processed_text):
             colored_text.append((processed_text[end:len(processed_text)], None))
         return annotated_image, colored_text, str(filtered_entities)