y22ma
/

Kosmos2-endpoint

Inference Endpoints

Model card Files Files and versions Community

Yan commited on Aug 25, 2023

Commit

4985728

1 Parent(s): c02a097

added debug print to see where the failure is coming from

Browse files

Files changed (1) hide show

handler.py +9 -0

handler.py CHANGED Viewed

@@ -180,20 +180,25 @@ class EndpointHandler():
         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
         image = data.pop("image", None)
         image_input = self.decode_base64_image(image)
         # Save the image and load it again to match the original Kosmos-2 demo.
         # (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
         user_image_path = "/tmp/user_input_test_image.jpg"
         image_input.save(user_image_path)
         # This might give different results from the original argument `image_input`
         image_input = Image.open(user_image_path)
         text_input = "<grounding>Describe this image in detail:"
         #text_input = f"<grounding>{text_input}"
         inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
         generated_ids = self.model.generate(
             pixel_values=inputs["pixel_values"].to("cuda"),
@@ -204,10 +209,13 @@ class EndpointHandler():
             use_cache=True,
             max_new_tokens=128,
         )
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         # By default, the generated  text is cleanup and the entities are extracted.
         processed_text, entities = self.processor.post_process_generation(generated_text)
         annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
@@ -237,6 +245,7 @@ class EndpointHandler():
         if end < len(processed_text):
             colored_text.append((processed_text[end:len(processed_text)], None))
         return annotated_image, colored_text, str(filtered_entities)

         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
+        print("Entered endpoint")
         image = data.pop("image", None)
         image_input = self.decode_base64_image(image)
+        print("Image decoded")
         # Save the image and load it again to match the original Kosmos-2 demo.
         # (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
         user_image_path = "/tmp/user_input_test_image.jpg"
         image_input.save(user_image_path)
+        print("Image saved")
         # This might give different results from the original argument `image_input`
         image_input = Image.open(user_image_path)
         text_input = "<grounding>Describe this image in detail:"
         #text_input = f"<grounding>{text_input}"
+        print("Image loaded again")
         inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
+        print("Processed")
         generated_ids = self.model.generate(
             pixel_values=inputs["pixel_values"].to("cuda"),
             use_cache=True,
             max_new_tokens=128,
         )
+        print("Generated")
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        print("Generated text")
         # By default, the generated  text is cleanup and the entities are extracted.
         processed_text, entities = self.processor.post_process_generation(generated_text)
+        print("Generated text processed")
         annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
         if end < len(processed_text):
             colored_text.append((processed_text[end:len(processed_text)], None))
+        print("Outputs prepared")
         return annotated_image, colored_text, str(filtered_entities)