teragron's picture
Upload 2 files
a067973 verified
import gradio as gr
import cv2
import numpy as np
import base64
import requests
import json
import time
import threading
from PIL import Image
import io
class CameraProcessor:
def __init__(self):
self.is_processing = False
self.processing_thread = None
self.stop_event = threading.Event()
def encode_image_to_base64(self, image):
"""Convert numpy array to base64 string"""
if image is None:
return None
# Convert from RGB to BGR for OpenCV
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Encode image to JPEG
_, buffer = cv2.imencode('.jpg', image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 80])
# Convert to base64
image_base64 = base64.b64encode(buffer).decode('utf-8')
return f"data:image/jpeg;base64,{image_base64}"
async def send_chat_completion_request(self, instruction, image_base64_url, base_url):
"""Send request to chat completion API"""
try:
payload = {
"max_tokens": 100,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": instruction},
{
"type": "image_url",
"image_url": {"url": image_base64_url}
}
]
}
]
}
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=payload,
timeout=10
)
if not response.ok:
return f"Server error: {response.status_code} - {response.text}"
data = response.json()
return data["choices"][0]["message"]["content"]
except Exception as e:
return f"Error: {str(e)}"
def process_frame(self, instruction, image, base_url):
"""Process a single frame"""
print(f"DEBUG: process_frame called with base_url: {base_url}")
if image is None:
print("DEBUG: No image captured")
return "No image captured"
image_base64 = self.encode_image_to_base64(image)
if not image_base64:
print("DEBUG: Failed to encode image")
return "Failed to encode image"
print(f"DEBUG: Sending request to {base_url}/v1/chat/completions")
# Since Gradio doesn't support async in interface functions easily,
# we'll use requests directly
try:
payload = {
"max_tokens": 100,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": instruction},
{
"type": "image_url",
"image_url": {"url": image_base64}
}
]
}
]
}
print("DEBUG: Making HTTP request...")
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=payload,
timeout=10
)
print(f"DEBUG: Response status: {response.status_code}")
if not response.ok:
error_msg = f"Server error: {response.status_code} - {response.text}"
print(f"DEBUG: {error_msg}")
return error_msg
data = response.json()
result = data["choices"][0]["message"]["content"]
print(f"DEBUG: Success - got response: {result}")
return result
except Exception as e:
error_msg = f"Error: {str(e)}"
print(f"DEBUG: Exception occurred: {error_msg}")
return error_msg
# Initialize processor
processor = CameraProcessor()
def process_image(instruction, image, base_url):
"""Main processing function for Gradio interface"""
print(f"DEBUG: process_image called - is_processing: {processor.is_processing}")
print(f"DEBUG: instruction: '{instruction}'")
print(f"DEBUG: base_url: '{base_url}'")
print(f"DEBUG: image is None: {image is None}")
print(f"DEBUG: image type: {type(image)}")
# Always return something to test if function is being called
if image is None:
print("DEBUG: No image from webcam")
return "No image from webcam - check camera permissions or try a different browser"
# For manual testing, skip the processing state check
# if not processor.is_processing:
# print("DEBUG: Not processing - returning early")
# return "Click Start to begin processing"
if not instruction.strip():
print("DEBUG: No instruction provided")
return "Please enter an instruction"
if not base_url.strip():
print("DEBUG: No base URL provided")
return "Please enter a base URL"
print("DEBUG: Calling process_frame")
result = processor.process_frame(instruction, image, base_url)
print(f"DEBUG: process_frame result: {result}")
return result
def toggle_processing():
"""Toggle processing state"""
processor.is_processing = not processor.is_processing
print(f"DEBUG: Processing toggled to: {processor.is_processing}")
if processor.is_processing:
return "Stop", "Processing started..."
else:
return "Start", "Processing stopped."
def update_stream_interval(interval):
"""Update streaming interval"""
return gr.update(stream_every=interval)
def test_api_connection(base_url):
"""Test if API server is reachable"""
try:
response = requests.get(f"{base_url}/health", timeout=5)
return f"API accessible: {response.status_code}"
except Exception as e:
return f"API connection failed: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Camera Interaction App", theme=gr.themes.Soft()) as interface:
gr.Markdown("# Camera Interaction App")
gr.Markdown("**Note:** Make sure to grant camera permissions in your browser!")
with gr.Row():
# Video input
video_input = gr.Image(
sources=["webcam"],
label="Camera Feed - Click to capture",
width=480,
height=360
)
with gr.Column():
# Base URL input
base_url_input = gr.Textbox(
label="Base API URL",
value="http://localhost:8080",
placeholder="Enter API base URL"
)
# Instruction input
instruction_input = gr.Textbox(
label="Instruction",
value="What do you see?",
placeholder="Enter your instruction",
lines=2
)
# Response output
response_output = gr.Textbox(
label="Response",
value="1. Grant camera permissions\n2. Capture a photo\n3. Click Process Image",
interactive=False,
lines=3
)
with gr.Row():
# Manual process button for testing
process_button = gr.Button("Process Image", variant="primary")
# Test button
test_button = gr.Button("Test API Connection", variant="secondary")
with gr.Row():
test_output = gr.Textbox(label="Connection Test", interactive=False)
# Manual processing for testing
process_button.click(
fn=process_image,
inputs=[instruction_input, video_input, base_url_input],
outputs=response_output
)
test_button.click(
fn=test_api_connection,
inputs=base_url_input,
outputs=test_output
)
if __name__ == "__main__":
interface.launch(
server_name="localhost",
server_port=7860,
share=False,
debug=True
)