vikhyatk commited on
Commit
089817d
·
verified ·
1 Parent(s): 7a7521a

Show inference time for both models

Browse files

Added an extra output that shows inference time for both models. I removed the `@GPU` annotation on the 'detect' function because it was causing a ~400ms hit to the first model's inference time (presumably because of ZeroGPU initialization that the second model is able to take advantage of). With the annotation removed both models get the performance hit resulting in a more apples-to-apples comparison.

Here's what it looks like:

![Screenshot 2025-07-03 at 3.58.31 AM.png](https://cdn-uploads.huggingface.co/production/uploads/63117568fa95534e218da163/IXm9qmvqOjTMvIgOz0G4I.png)

Files changed (1) hide show
  1. app.py +24 -9
app.py CHANGED
@@ -2,6 +2,7 @@ import random
2
  import requests
3
  import json
4
  import ast
 
5
 
6
  import matplotlib.pyplot as plt
7
  from PIL import Image, ImageDraw, ImageFont
@@ -156,6 +157,7 @@ def detect_qwen(image, prompt):
156
  }
157
  ]
158
 
 
159
  text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
160
  image_inputs, video_inputs = process_vision_info(messages)
161
  inputs = processor_qwen(
@@ -173,37 +175,41 @@ def detect_qwen(image, prompt):
173
  output_text = processor_qwen.batch_decode(
174
  generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
175
  )[0]
 
176
 
177
  input_height = inputs['image_grid_thw'][0][1] * 14
178
  input_width = inputs['image_grid_thw'][0][2] * 14
179
 
180
  annotated_image = create_annotated_image(image, output_text, input_height, input_width)
181
 
182
- return annotated_image, output_text
 
183
 
184
 
185
  @GPU
186
  def detect_moondream(image, prompt, category_input):
 
187
  if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
188
  output_text = model_moondream.detect(image=image, object=prompt)
189
  elif category_input == "Visual Grounding + Keypoint Detection":
190
  output_text = model_moondream.point(image=image, object=prompt)
191
  else:
192
  output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
 
193
 
194
  annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
195
-
196
- return annotated_image, output_text
197
 
198
- @GPU
 
 
199
  def detect(image, prompt_model_1, prompt_model_2, category_input):
200
  STANDARD_SIZE = (1024, 1024)
201
  image.thumbnail(STANDARD_SIZE)
202
 
203
- annotated_image_model_1, output_text_model_1 = detect_qwen(image, prompt_model_1)
204
- annotated_image_model_2, output_text_model_2 = detect_moondream(image, prompt_model_2, category_input)
205
 
206
- return annotated_image_model_1, output_text_model_1, annotated_image_model_2, output_text_model_2
207
 
208
  css_hide_share = """
209
  button#gradio-share-link-button-0 {
@@ -253,10 +259,12 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
253
  with gr.Column(scale=1):
254
  output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
255
  output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
 
256
 
257
  with gr.Column(scale=1):
258
  output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
259
  output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
 
260
 
261
  gr.Markdown("### Examples")
262
  example_prompts = [
@@ -276,8 +284,15 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
276
  label="Click an example to populate the input"
277
  )
278
 
279
- generate_btn.click(fn=detect, inputs=[image_input, prompt_input_model_1, prompt_input_model_2, category_input], outputs=[output_image_model_1, output_textbox_model_1, output_image_model_2, output_textbox_model_2])
280
-
 
 
 
 
 
 
 
281
  if __name__ == "__main__":
282
  demo.launch()
283
 
 
2
  import requests
3
  import json
4
  import ast
5
+ import time
6
 
7
  import matplotlib.pyplot as plt
8
  from PIL import Image, ImageDraw, ImageFont
 
157
  }
158
  ]
159
 
160
+ t0 = time.perf_counter()
161
  text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
162
  image_inputs, video_inputs = process_vision_info(messages)
163
  inputs = processor_qwen(
 
175
  output_text = processor_qwen.batch_decode(
176
  generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
177
  )[0]
178
+ elapsed_ms = (time.perf_counter() - t0) * 1_000
179
 
180
  input_height = inputs['image_grid_thw'][0][1] * 14
181
  input_width = inputs['image_grid_thw'][0][2] * 14
182
 
183
  annotated_image = create_annotated_image(image, output_text, input_height, input_width)
184
 
185
+ time_taken = f"**Inference time ({model_qwen_name}):** {elapsed_ms:.0f} ms"
186
+ return annotated_image, output_text, time_taken
187
 
188
 
189
  @GPU
190
  def detect_moondream(image, prompt, category_input):
191
+ t0 = time.perf_counter()
192
  if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
193
  output_text = model_moondream.detect(image=image, object=prompt)
194
  elif category_input == "Visual Grounding + Keypoint Detection":
195
  output_text = model_moondream.point(image=image, object=prompt)
196
  else:
197
  output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
198
+ elapsed_ms = (time.perf_counter() - t0) * 1_000
199
 
200
  annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
 
 
201
 
202
+ time_taken = f"**Inference time ({model_moondream_name}):** {elapsed_ms:.0f} ms"
203
+ return annotated_image, output_text, time_taken
204
+
205
  def detect(image, prompt_model_1, prompt_model_2, category_input):
206
  STANDARD_SIZE = (1024, 1024)
207
  image.thumbnail(STANDARD_SIZE)
208
 
209
+ annotated_image_model_1, output_text_model_1, timing_1 = detect_qwen(image, prompt_model_1)
210
+ annotated_image_model_2, output_text_model_2, timing_2 = detect_moondream(image, prompt_model_2, category_input)
211
 
212
+ return annotated_image_model_1, output_text_model_1, timing_1, annotated_image_model_2, output_text_model_2, timing_2
213
 
214
  css_hide_share = """
215
  button#gradio-share-link-button-0 {
 
259
  with gr.Column(scale=1):
260
  output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
261
  output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
262
+ output_time_model_1 = gr.Markdown()
263
 
264
  with gr.Column(scale=1):
265
  output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
266
  output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
267
+ output_time_model_2 = gr.Markdown()
268
 
269
  gr.Markdown("### Examples")
270
  example_prompts = [
 
284
  label="Click an example to populate the input"
285
  )
286
 
287
+ generate_btn.click(
288
+ fn=detect,
289
+ inputs=[image_input, prompt_input_model_1, prompt_input_model_2, category_input],
290
+ outputs=[
291
+ output_image_model_1, output_textbox_model_1, output_time_model_1,
292
+ output_image_model_2, output_textbox_model_2, output_time_model_2
293
+ ]
294
+ )
295
+
296
  if __name__ == "__main__":
297
  demo.launch()
298