Yan
commited on
Commit
·
4985728
1
Parent(s):
c02a097
added debug print to see where the failure is coming from
Browse files- handler.py +9 -0
handler.py
CHANGED
@@ -180,20 +180,25 @@ class EndpointHandler():
|
|
180 |
:param data: A dictionary contains `inputs` and optional `image` field.
|
181 |
:return: A dictionary with `image` field contains image in base64.
|
182 |
"""
|
|
|
183 |
image = data.pop("image", None)
|
184 |
image_input = self.decode_base64_image(image)
|
|
|
185 |
|
186 |
# Save the image and load it again to match the original Kosmos-2 demo.
|
187 |
# (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
|
188 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
189 |
image_input.save(user_image_path)
|
|
|
190 |
|
191 |
# This might give different results from the original argument `image_input`
|
192 |
image_input = Image.open(user_image_path)
|
193 |
text_input = "<grounding>Describe this image in detail:"
|
194 |
#text_input = f"<grounding>{text_input}"
|
|
|
195 |
|
196 |
inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
|
|
|
197 |
|
198 |
generated_ids = self.model.generate(
|
199 |
pixel_values=inputs["pixel_values"].to("cuda"),
|
@@ -204,10 +209,13 @@ class EndpointHandler():
|
|
204 |
use_cache=True,
|
205 |
max_new_tokens=128,
|
206 |
)
|
|
|
207 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
208 |
|
209 |
# By default, the generated text is cleanup and the entities are extracted.
|
210 |
processed_text, entities = self.processor.post_process_generation(generated_text)
|
|
|
211 |
|
212 |
annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
|
213 |
|
@@ -237,6 +245,7 @@ class EndpointHandler():
|
|
237 |
|
238 |
if end < len(processed_text):
|
239 |
colored_text.append((processed_text[end:len(processed_text)], None))
|
|
|
240 |
|
241 |
return annotated_image, colored_text, str(filtered_entities)
|
242 |
|
|
|
180 |
:param data: A dictionary contains `inputs` and optional `image` field.
|
181 |
:return: A dictionary with `image` field contains image in base64.
|
182 |
"""
|
183 |
+
print("Entered endpoint")
|
184 |
image = data.pop("image", None)
|
185 |
image_input = self.decode_base64_image(image)
|
186 |
+
print("Image decoded")
|
187 |
|
188 |
# Save the image and load it again to match the original Kosmos-2 demo.
|
189 |
# (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
|
190 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
191 |
image_input.save(user_image_path)
|
192 |
+
print("Image saved")
|
193 |
|
194 |
# This might give different results from the original argument `image_input`
|
195 |
image_input = Image.open(user_image_path)
|
196 |
text_input = "<grounding>Describe this image in detail:"
|
197 |
#text_input = f"<grounding>{text_input}"
|
198 |
+
print("Image loaded again")
|
199 |
|
200 |
inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
|
201 |
+
print("Processed")
|
202 |
|
203 |
generated_ids = self.model.generate(
|
204 |
pixel_values=inputs["pixel_values"].to("cuda"),
|
|
|
209 |
use_cache=True,
|
210 |
max_new_tokens=128,
|
211 |
)
|
212 |
+
print("Generated")
|
213 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
214 |
+
print("Generated text")
|
215 |
|
216 |
# By default, the generated text is cleanup and the entities are extracted.
|
217 |
processed_text, entities = self.processor.post_process_generation(generated_text)
|
218 |
+
print("Generated text processed")
|
219 |
|
220 |
annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
|
221 |
|
|
|
245 |
|
246 |
if end < len(processed_text):
|
247 |
colored_text.append((processed_text[end:len(processed_text)], None))
|
248 |
+
print("Outputs prepared")
|
249 |
|
250 |
return annotated_image, colored_text, str(filtered_entities)
|
251 |
|