Delete app.py
Browse files
app.py
DELETED
@@ -1,161 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import torch
|
3 |
-
import time
|
4 |
-
from PIL import Image, ImageDraw, ImageFont
|
5 |
-
from transformers import (
|
6 |
-
AutoProcessor,
|
7 |
-
Owlv2ForObjectDetection,
|
8 |
-
Qwen2VLForConditionalGeneration
|
9 |
-
)
|
10 |
-
|
11 |
-
# Initialize models
|
12 |
-
obj_processor = AutoProcessor.from_pretrained("google/owlv2-base-patch16-ensemble")
|
13 |
-
obj_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
|
14 |
-
cbt_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
15 |
-
"Qwen/Qwen2-VL-2B-Instruct",
|
16 |
-
torch_dtype="auto",
|
17 |
-
device_map="auto",
|
18 |
-
)
|
19 |
-
cbt_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
|
20 |
-
|
21 |
-
# Color palette for bounding boxes
|
22 |
-
colors = [
|
23 |
-
(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 165, 0), (75, 0, 130),
|
24 |
-
(255, 255, 0), (0, 255, 255), (255, 105, 180), (138, 43, 226), (0, 128, 0)
|
25 |
-
]
|
26 |
-
|
27 |
-
# Conversation history
|
28 |
-
history = [
|
29 |
-
{
|
30 |
-
"role": "system",
|
31 |
-
"content": [
|
32 |
-
{"type": "text", "text": "You are a helpful AI assistant."}
|
33 |
-
]
|
34 |
-
}
|
35 |
-
]
|
36 |
-
|
37 |
-
# Helper Functions
|
38 |
-
def detect_single_image(image, objects):
|
39 |
-
"""Process one image for object detection."""
|
40 |
-
inputs = obj_processor(text=[objects], images=image, return_tensors="pt")
|
41 |
-
with torch.no_grad():
|
42 |
-
outputs = obj_model(**inputs)
|
43 |
-
target_sizes = torch.Tensor([image.size[::-1]])
|
44 |
-
results = obj_processor.post_process_object_detection(
|
45 |
-
outputs=outputs, threshold=0.2, target_sizes=target_sizes
|
46 |
-
)[0]
|
47 |
-
return results["boxes"], results["scores"], results["labels"]
|
48 |
-
|
49 |
-
def annotate_image(image, boxes, scores, labels, objects):
|
50 |
-
"""Draw bounding boxes on image."""
|
51 |
-
draw = ImageDraw.Draw(image)
|
52 |
-
font = ImageFont.load_default()
|
53 |
-
object_list = [obj.strip() for obj in objects.split(",")]
|
54 |
-
|
55 |
-
for box, score, label in zip(boxes, scores, labels):
|
56 |
-
box = [round(coord, 2) for coord in box.tolist()]
|
57 |
-
color = colors[label % len(colors)]
|
58 |
-
draw.rectangle(box, outline=color, width=3)
|
59 |
-
draw.text((box[0], box[1]), f"{object_list[label]}: {score:.2f}", fill=color)
|
60 |
-
return image
|
61 |
-
|
62 |
-
# Core Functions
|
63 |
-
def detect_objects(images, objects):
|
64 |
-
"""Process multiple images for object detection."""
|
65 |
-
if not images:
|
66 |
-
return []
|
67 |
-
|
68 |
-
annotated_images = []
|
69 |
-
for image in images:
|
70 |
-
boxes, scores, labels = detect_single_image(image, objects)
|
71 |
-
annotated_images.append(annotate_image(image, boxes, scores, labels, objects))
|
72 |
-
|
73 |
-
history.append({
|
74 |
-
'role': 'system',
|
75 |
-
'content': [{'type': 'text', 'text': f'Processed {len(images)} images'}]
|
76 |
-
})
|
77 |
-
return annotated_images
|
78 |
-
|
79 |
-
def user(message, chat_history):
|
80 |
-
"""Handle user message submission."""
|
81 |
-
return "", chat_history + [[message, ""]]
|
82 |
-
|
83 |
-
def chat_function(images, chat_history):
|
84 |
-
"""Generate chatbot response based on images and chat history."""
|
85 |
-
if not chat_history:
|
86 |
-
return chat_history
|
87 |
-
|
88 |
-
message = chat_history[-1][0]
|
89 |
-
history.append({
|
90 |
-
"role": "user",
|
91 |
-
"content": [
|
92 |
-
{"type": "text", "text": message},
|
93 |
-
*[{"type": "image"} for _ in images]
|
94 |
-
]
|
95 |
-
})
|
96 |
-
|
97 |
-
inputs = cbt_processor(
|
98 |
-
text=[cbt_processor.apply_chat_template(history, add_generation_prompt=True)],
|
99 |
-
images=images,
|
100 |
-
padding=True,
|
101 |
-
return_tensors="pt"
|
102 |
-
).to("cuda" if torch.cuda.is_available() else "cpu")
|
103 |
-
|
104 |
-
output_ids = cbt_model.generate(**inputs, max_new_tokens=1024)
|
105 |
-
bot_output = cbt_processor.batch_decode(output_ids, skip_special_tokens=True)[0]
|
106 |
-
|
107 |
-
history.append({"role": "assistant", "content": [{"type": "text", "text": bot_output}]})
|
108 |
-
chat_history[-1][1] = bot_output
|
109 |
-
return chat_history
|
110 |
-
|
111 |
-
# Gradio Interface
|
112 |
-
with gr.Blocks() as demo:
|
113 |
-
with gr.Row():
|
114 |
-
with gr.Column(scale=1):
|
115 |
-
gr.Markdown("## Image Upload")
|
116 |
-
image_input = gr.Gallery(
|
117 |
-
label="Upload Images",
|
118 |
-
type="pil",
|
119 |
-
columns=2,
|
120 |
-
height="auto"
|
121 |
-
)
|
122 |
-
objects_input = gr.Textbox(
|
123 |
-
label="Objects to Detect",
|
124 |
-
placeholder="cat, dog, car...",
|
125 |
-
interactive=True
|
126 |
-
)
|
127 |
-
detect_button = gr.Button("Detect Objects")
|
128 |
-
image_output = gr.Gallery(
|
129 |
-
label="Detection Results",
|
130 |
-
columns=2,
|
131 |
-
height="auto"
|
132 |
-
)
|
133 |
-
|
134 |
-
with gr.Column(scale=2):
|
135 |
-
chatbot = gr.Chatbot(height=500)
|
136 |
-
msg = gr.Textbox(
|
137 |
-
label="Chat with the AI",
|
138 |
-
placeholder="Ask about the images..."
|
139 |
-
)
|
140 |
-
clear = gr.ClearButton([msg, chatbot, image_input, image_output])
|
141 |
-
|
142 |
-
# Event handlers
|
143 |
-
detect_button.click(
|
144 |
-
fn=detect_objects,
|
145 |
-
inputs=[image_input, objects_input],
|
146 |
-
outputs=image_output
|
147 |
-
)
|
148 |
-
|
149 |
-
msg.submit(
|
150 |
-
fn=user,
|
151 |
-
inputs=[msg, chatbot],
|
152 |
-
outputs=[msg, chatbot],
|
153 |
-
queue=False
|
154 |
-
).then(
|
155 |
-
fn=chat_function,
|
156 |
-
inputs=[image_input, chatbot],
|
157 |
-
outputs=[chatbot]
|
158 |
-
)
|
159 |
-
|
160 |
-
if __name__ == "__main__":
|
161 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|