Yan
commited on
Commit
·
226d5bb
1
Parent(s):
e17a909
removed debug print
Browse files- handler.py +0 -9
handler.py
CHANGED
@@ -182,25 +182,20 @@ class EndpointHandler():
|
|
182 |
:param data: A dictionary contains `inputs` and optional `image` field.
|
183 |
:return: A dictionary with `image` field contains image in base64.
|
184 |
"""
|
185 |
-
print("Entered endpoint")
|
186 |
image = data.pop("image", None)
|
187 |
image_input = self.decode_base64_image(image)
|
188 |
-
print("Image decoded")
|
189 |
|
190 |
# Save the image and load it again to match the original Kosmos-2 demo.
|
191 |
# (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
|
192 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
193 |
image_input.save(user_image_path)
|
194 |
-
print("Image saved")
|
195 |
|
196 |
# This might give different results from the original argument `image_input`
|
197 |
image_input = Image.open(user_image_path)
|
198 |
text_input = "<grounding>Describe this image in detail:"
|
199 |
#text_input = f"<grounding>{text_input}"
|
200 |
-
print("Image loaded again")
|
201 |
|
202 |
inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
|
203 |
-
print("Processed")
|
204 |
|
205 |
generated_ids = self.model.generate(
|
206 |
pixel_values=inputs["pixel_values"].to("cuda"),
|
@@ -211,13 +206,10 @@ class EndpointHandler():
|
|
211 |
use_cache=True,
|
212 |
max_new_tokens=128,
|
213 |
)
|
214 |
-
print("Generated")
|
215 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
216 |
-
print("Generated text")
|
217 |
|
218 |
# By default, the generated text is cleanup and the entities are extracted.
|
219 |
processed_text, entities = self.processor.post_process_generation(generated_text)
|
220 |
-
print("Generated text processed")
|
221 |
|
222 |
annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
|
223 |
|
@@ -247,7 +239,6 @@ class EndpointHandler():
|
|
247 |
|
248 |
if end < len(processed_text):
|
249 |
colored_text.append((processed_text[end:len(processed_text)], None))
|
250 |
-
print("Outputs prepared")
|
251 |
|
252 |
return annotated_image, colored_text, str(filtered_entities)
|
253 |
|
|
|
182 |
:param data: A dictionary contains `inputs` and optional `image` field.
|
183 |
:return: A dictionary with `image` field contains image in base64.
|
184 |
"""
|
|
|
185 |
image = data.pop("image", None)
|
186 |
image_input = self.decode_base64_image(image)
|
|
|
187 |
|
188 |
# Save the image and load it again to match the original Kosmos-2 demo.
|
189 |
# (https://github.com/microsoft/unilm/blob/f4695ed0244a275201fff00bee495f76670fbe70/kosmos-2/demo/gradio_app.py#L345-L346)
|
190 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
191 |
image_input.save(user_image_path)
|
|
|
192 |
|
193 |
# This might give different results from the original argument `image_input`
|
194 |
image_input = Image.open(user_image_path)
|
195 |
text_input = "<grounding>Describe this image in detail:"
|
196 |
#text_input = f"<grounding>{text_input}"
|
|
|
197 |
|
198 |
inputs = self.processor(text=text_input, images=image_input, return_tensors="pt")
|
|
|
199 |
|
200 |
generated_ids = self.model.generate(
|
201 |
pixel_values=inputs["pixel_values"].to("cuda"),
|
|
|
206 |
use_cache=True,
|
207 |
max_new_tokens=128,
|
208 |
)
|
|
|
209 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
210 |
|
211 |
# By default, the generated text is cleanup and the entities are extracted.
|
212 |
processed_text, entities = self.processor.post_process_generation(generated_text)
|
|
|
213 |
|
214 |
annotated_image = self.draw_entity_boxes_on_image(image_input, entities, show=False)
|
215 |
|
|
|
239 |
|
240 |
if end < len(processed_text):
|
241 |
colored_text.append((processed_text[end:len(processed_text)], None))
|
|
|
242 |
|
243 |
return annotated_image, colored_text, str(filtered_entities)
|
244 |
|