Spaces:
Runtime error
Runtime error
import gradio as gr | |
import cv2 | |
import numpy as np | |
import base64 | |
import requests | |
import json | |
import time | |
import threading | |
from PIL import Image | |
import io | |
class CameraProcessor: | |
def __init__(self): | |
self.is_processing = False | |
self.processing_thread = None | |
self.stop_event = threading.Event() | |
def encode_image_to_base64(self, image): | |
"""Convert numpy array to base64 string""" | |
if image is None: | |
return None | |
# Convert from RGB to BGR for OpenCV | |
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
# Encode image to JPEG | |
_, buffer = cv2.imencode('.jpg', image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 80]) | |
# Convert to base64 | |
image_base64 = base64.b64encode(buffer).decode('utf-8') | |
return f"data:image/jpeg;base64,{image_base64}" | |
async def send_chat_completion_request(self, instruction, image_base64_url, base_url): | |
"""Send request to chat completion API""" | |
try: | |
payload = { | |
"max_tokens": 100, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": instruction}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": image_base64_url} | |
} | |
] | |
} | |
] | |
} | |
response = requests.post( | |
f"{base_url}/v1/chat/completions", | |
headers={"Content-Type": "application/json"}, | |
json=payload, | |
timeout=10 | |
) | |
if not response.ok: | |
return f"Server error: {response.status_code} - {response.text}" | |
data = response.json() | |
return data["choices"][0]["message"]["content"] | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def process_frame(self, instruction, image, base_url): | |
"""Process a single frame""" | |
print(f"DEBUG: process_frame called with base_url: {base_url}") | |
if image is None: | |
print("DEBUG: No image captured") | |
return "No image captured" | |
image_base64 = self.encode_image_to_base64(image) | |
if not image_base64: | |
print("DEBUG: Failed to encode image") | |
return "Failed to encode image" | |
print(f"DEBUG: Sending request to {base_url}/v1/chat/completions") | |
# Since Gradio doesn't support async in interface functions easily, | |
# we'll use requests directly | |
try: | |
payload = { | |
"max_tokens": 100, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": instruction}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": image_base64} | |
} | |
] | |
} | |
] | |
} | |
print("DEBUG: Making HTTP request...") | |
response = requests.post( | |
f"{base_url}/v1/chat/completions", | |
headers={"Content-Type": "application/json"}, | |
json=payload, | |
timeout=10 | |
) | |
print(f"DEBUG: Response status: {response.status_code}") | |
if not response.ok: | |
error_msg = f"Server error: {response.status_code} - {response.text}" | |
print(f"DEBUG: {error_msg}") | |
return error_msg | |
data = response.json() | |
result = data["choices"][0]["message"]["content"] | |
print(f"DEBUG: Success - got response: {result}") | |
return result | |
except Exception as e: | |
error_msg = f"Error: {str(e)}" | |
print(f"DEBUG: Exception occurred: {error_msg}") | |
return error_msg | |
# Initialize processor | |
processor = CameraProcessor() | |
def process_image(instruction, image, base_url): | |
"""Main processing function for Gradio interface""" | |
print(f"DEBUG: process_image called - is_processing: {processor.is_processing}") | |
print(f"DEBUG: instruction: '{instruction}'") | |
print(f"DEBUG: base_url: '{base_url}'") | |
print(f"DEBUG: image is None: {image is None}") | |
print(f"DEBUG: image type: {type(image)}") | |
# Always return something to test if function is being called | |
if image is None: | |
print("DEBUG: No image from webcam") | |
return "No image from webcam - check camera permissions or try a different browser" | |
# For manual testing, skip the processing state check | |
# if not processor.is_processing: | |
# print("DEBUG: Not processing - returning early") | |
# return "Click Start to begin processing" | |
if not instruction.strip(): | |
print("DEBUG: No instruction provided") | |
return "Please enter an instruction" | |
if not base_url.strip(): | |
print("DEBUG: No base URL provided") | |
return "Please enter a base URL" | |
print("DEBUG: Calling process_frame") | |
result = processor.process_frame(instruction, image, base_url) | |
print(f"DEBUG: process_frame result: {result}") | |
return result | |
def toggle_processing(): | |
"""Toggle processing state""" | |
processor.is_processing = not processor.is_processing | |
print(f"DEBUG: Processing toggled to: {processor.is_processing}") | |
if processor.is_processing: | |
return "Stop", "Processing started..." | |
else: | |
return "Start", "Processing stopped." | |
def update_stream_interval(interval): | |
"""Update streaming interval""" | |
return gr.update(stream_every=interval) | |
def test_api_connection(base_url): | |
"""Test if API server is reachable""" | |
try: | |
response = requests.get(f"{base_url}/health", timeout=5) | |
return f"API accessible: {response.status_code}" | |
except Exception as e: | |
return f"API connection failed: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="Camera Interaction App", theme=gr.themes.Soft()) as interface: | |
gr.Markdown("# Camera Interaction App") | |
gr.Markdown("**Note:** Make sure to grant camera permissions in your browser!") | |
with gr.Row(): | |
# Video input | |
video_input = gr.Image( | |
sources=["webcam"], | |
label="Camera Feed - Click to capture", | |
width=480, | |
height=360 | |
) | |
with gr.Column(): | |
# Base URL input | |
base_url_input = gr.Textbox( | |
label="Base API URL", | |
value="http://localhost:8080", | |
placeholder="Enter API base URL" | |
) | |
# Instruction input | |
instruction_input = gr.Textbox( | |
label="Instruction", | |
value="What do you see?", | |
placeholder="Enter your instruction", | |
lines=2 | |
) | |
# Response output | |
response_output = gr.Textbox( | |
label="Response", | |
value="1. Grant camera permissions\n2. Capture a photo\n3. Click Process Image", | |
interactive=False, | |
lines=3 | |
) | |
with gr.Row(): | |
# Manual process button for testing | |
process_button = gr.Button("Process Image", variant="primary") | |
# Test button | |
test_button = gr.Button("Test API Connection", variant="secondary") | |
with gr.Row(): | |
test_output = gr.Textbox(label="Connection Test", interactive=False) | |
# Manual processing for testing | |
process_button.click( | |
fn=process_image, | |
inputs=[instruction_input, video_input, base_url_input], | |
outputs=response_output | |
) | |
test_button.click( | |
fn=test_api_connection, | |
inputs=base_url_input, | |
outputs=test_output | |
) | |
if __name__ == "__main__": | |
interface.launch( | |
server_name="localhost", | |
server_port=7860, | |
share=False, | |
debug=True | |
) |