teragron commited on
Commit
a067973
·
verified ·
1 Parent(s): a30b7a5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +249 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import base64
5
+ import requests
6
+ import json
7
+ import time
8
+ import threading
9
+ from PIL import Image
10
+ import io
11
+
12
+ class CameraProcessor:
13
+ def __init__(self):
14
+ self.is_processing = False
15
+ self.processing_thread = None
16
+ self.stop_event = threading.Event()
17
+
18
+ def encode_image_to_base64(self, image):
19
+ """Convert numpy array to base64 string"""
20
+ if image is None:
21
+ return None
22
+
23
+ # Convert from RGB to BGR for OpenCV
24
+ image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
25
+
26
+ # Encode image to JPEG
27
+ _, buffer = cv2.imencode('.jpg', image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 80])
28
+
29
+ # Convert to base64
30
+ image_base64 = base64.b64encode(buffer).decode('utf-8')
31
+ return f"data:image/jpeg;base64,{image_base64}"
32
+
33
+ async def send_chat_completion_request(self, instruction, image_base64_url, base_url):
34
+ """Send request to chat completion API"""
35
+ try:
36
+ payload = {
37
+ "max_tokens": 100,
38
+ "messages": [
39
+ {
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "text", "text": instruction},
43
+ {
44
+ "type": "image_url",
45
+ "image_url": {"url": image_base64_url}
46
+ }
47
+ ]
48
+ }
49
+ ]
50
+ }
51
+
52
+ response = requests.post(
53
+ f"{base_url}/v1/chat/completions",
54
+ headers={"Content-Type": "application/json"},
55
+ json=payload,
56
+ timeout=10
57
+ )
58
+
59
+ if not response.ok:
60
+ return f"Server error: {response.status_code} - {response.text}"
61
+
62
+ data = response.json()
63
+ return data["choices"][0]["message"]["content"]
64
+
65
+ except Exception as e:
66
+ return f"Error: {str(e)}"
67
+
68
+ def process_frame(self, instruction, image, base_url):
69
+ """Process a single frame"""
70
+ print(f"DEBUG: process_frame called with base_url: {base_url}")
71
+
72
+ if image is None:
73
+ print("DEBUG: No image captured")
74
+ return "No image captured"
75
+
76
+ image_base64 = self.encode_image_to_base64(image)
77
+ if not image_base64:
78
+ print("DEBUG: Failed to encode image")
79
+ return "Failed to encode image"
80
+
81
+ print(f"DEBUG: Sending request to {base_url}/v1/chat/completions")
82
+
83
+ # Since Gradio doesn't support async in interface functions easily,
84
+ # we'll use requests directly
85
+ try:
86
+ payload = {
87
+ "max_tokens": 100,
88
+ "messages": [
89
+ {
90
+ "role": "user",
91
+ "content": [
92
+ {"type": "text", "text": instruction},
93
+ {
94
+ "type": "image_url",
95
+ "image_url": {"url": image_base64}
96
+ }
97
+ ]
98
+ }
99
+ ]
100
+ }
101
+
102
+ print("DEBUG: Making HTTP request...")
103
+ response = requests.post(
104
+ f"{base_url}/v1/chat/completions",
105
+ headers={"Content-Type": "application/json"},
106
+ json=payload,
107
+ timeout=10
108
+ )
109
+
110
+ print(f"DEBUG: Response status: {response.status_code}")
111
+
112
+ if not response.ok:
113
+ error_msg = f"Server error: {response.status_code} - {response.text}"
114
+ print(f"DEBUG: {error_msg}")
115
+ return error_msg
116
+
117
+ data = response.json()
118
+ result = data["choices"][0]["message"]["content"]
119
+ print(f"DEBUG: Success - got response: {result}")
120
+ return result
121
+
122
+ except Exception as e:
123
+ error_msg = f"Error: {str(e)}"
124
+ print(f"DEBUG: Exception occurred: {error_msg}")
125
+ return error_msg
126
+
127
+ # Initialize processor
128
+ processor = CameraProcessor()
129
+
130
+ def process_image(instruction, image, base_url):
131
+ """Main processing function for Gradio interface"""
132
+ print(f"DEBUG: process_image called - is_processing: {processor.is_processing}")
133
+ print(f"DEBUG: instruction: '{instruction}'")
134
+ print(f"DEBUG: base_url: '{base_url}'")
135
+ print(f"DEBUG: image is None: {image is None}")
136
+ print(f"DEBUG: image type: {type(image)}")
137
+
138
+ # Always return something to test if function is being called
139
+ if image is None:
140
+ print("DEBUG: No image from webcam")
141
+ return "No image from webcam - check camera permissions or try a different browser"
142
+
143
+ # For manual testing, skip the processing state check
144
+ # if not processor.is_processing:
145
+ # print("DEBUG: Not processing - returning early")
146
+ # return "Click Start to begin processing"
147
+
148
+ if not instruction.strip():
149
+ print("DEBUG: No instruction provided")
150
+ return "Please enter an instruction"
151
+
152
+ if not base_url.strip():
153
+ print("DEBUG: No base URL provided")
154
+ return "Please enter a base URL"
155
+
156
+ print("DEBUG: Calling process_frame")
157
+ result = processor.process_frame(instruction, image, base_url)
158
+ print(f"DEBUG: process_frame result: {result}")
159
+ return result
160
+
161
+ def toggle_processing():
162
+ """Toggle processing state"""
163
+ processor.is_processing = not processor.is_processing
164
+ print(f"DEBUG: Processing toggled to: {processor.is_processing}")
165
+ if processor.is_processing:
166
+ return "Stop", "Processing started..."
167
+ else:
168
+ return "Start", "Processing stopped."
169
+
170
+ def update_stream_interval(interval):
171
+ """Update streaming interval"""
172
+ return gr.update(stream_every=interval)
173
+
174
+ def test_api_connection(base_url):
175
+ """Test if API server is reachable"""
176
+ try:
177
+ response = requests.get(f"{base_url}/health", timeout=5)
178
+ return f"API accessible: {response.status_code}"
179
+ except Exception as e:
180
+ return f"API connection failed: {str(e)}"
181
+
182
+ # Create Gradio interface
183
+ with gr.Blocks(title="Camera Interaction App", theme=gr.themes.Soft()) as interface:
184
+ gr.Markdown("# Camera Interaction App")
185
+ gr.Markdown("**Note:** Make sure to grant camera permissions in your browser!")
186
+
187
+ with gr.Row():
188
+ # Video input
189
+ video_input = gr.Image(
190
+ sources=["webcam"],
191
+ label="Camera Feed - Click to capture",
192
+ width=480,
193
+ height=360
194
+ )
195
+
196
+ with gr.Column():
197
+ # Base URL input
198
+ base_url_input = gr.Textbox(
199
+ label="Base API URL",
200
+ value="http://localhost:8080",
201
+ placeholder="Enter API base URL"
202
+ )
203
+
204
+ # Instruction input
205
+ instruction_input = gr.Textbox(
206
+ label="Instruction",
207
+ value="What do you see?",
208
+ placeholder="Enter your instruction",
209
+ lines=2
210
+ )
211
+
212
+ # Response output
213
+ response_output = gr.Textbox(
214
+ label="Response",
215
+ value="1. Grant camera permissions\n2. Capture a photo\n3. Click Process Image",
216
+ interactive=False,
217
+ lines=3
218
+ )
219
+
220
+ with gr.Row():
221
+ # Manual process button for testing
222
+ process_button = gr.Button("Process Image", variant="primary")
223
+
224
+ # Test button
225
+ test_button = gr.Button("Test API Connection", variant="secondary")
226
+
227
+ with gr.Row():
228
+ test_output = gr.Textbox(label="Connection Test", interactive=False)
229
+
230
+ # Manual processing for testing
231
+ process_button.click(
232
+ fn=process_image,
233
+ inputs=[instruction_input, video_input, base_url_input],
234
+ outputs=response_output
235
+ )
236
+
237
+ test_button.click(
238
+ fn=test_api_connection,
239
+ inputs=base_url_input,
240
+ outputs=test_output
241
+ )
242
+
243
+ if __name__ == "__main__":
244
+ interface.launch(
245
+ server_name="localhost",
246
+ server_port=7860,
247
+ share=False,
248
+ debug=True
249
+ )
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ opencv-python
3
+ numpy