Spaces:

teragron
/

smolvlm-realtime-webcam-gradio

Runtime error

App Files Files Community

teragron commited on Jun 2

Commit

a067973

verified ·

1 Parent(s): a30b7a5

Upload 2 files

Browse files

Files changed (2) hide show

app.py +249 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import gradio as gr
+import cv2
+import numpy as np
+import base64
+import requests
+import json
+import time
+import threading
+from PIL import Image
+import io
+class CameraProcessor:
+    def __init__(self):
+        self.is_processing = False
+        self.processing_thread = None
+        self.stop_event = threading.Event()
+    def encode_image_to_base64(self, image):
+        """Convert numpy array to base64 string"""
+        if image is None:
+            return None
+        # Convert from RGB to BGR for OpenCV
+        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        # Encode image to JPEG
+        _, buffer = cv2.imencode('.jpg', image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 80])
+        # Convert to base64
+        image_base64 = base64.b64encode(buffer).decode('utf-8')
+        return f"data:image/jpeg;base64,{image_base64}"
+    async def send_chat_completion_request(self, instruction, image_base64_url, base_url):
+        """Send request to chat completion API"""
+        try:
+            payload = {
+                "max_tokens": 100,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": instruction},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": image_base64_url}
+                            }
+                        ]
+                    }
+                ]
+            }
+            response = requests.post(
+                f"{base_url}/v1/chat/completions",
+                headers={"Content-Type": "application/json"},
+                json=payload,
+                timeout=10
+            )
+            if not response.ok:
+                return f"Server error: {response.status_code} - {response.text}"
+            data = response.json()
+            return data["choices"][0]["message"]["content"]
+        except Exception as e:
+            return f"Error: {str(e)}"
+    def process_frame(self, instruction, image, base_url):
+        """Process a single frame"""
+        print(f"DEBUG: process_frame called with base_url: {base_url}")
+        if image is None:
+            print("DEBUG: No image captured")
+            return "No image captured"
+        image_base64 = self.encode_image_to_base64(image)
+        if not image_base64:
+            print("DEBUG: Failed to encode image")
+            return "Failed to encode image"
+        print(f"DEBUG: Sending request to {base_url}/v1/chat/completions")
+        # Since Gradio doesn't support async in interface functions easily,
+        # we'll use requests directly
+        try:
+            payload = {
+                "max_tokens": 100,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": instruction},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": image_base64}
+                            }
+                        ]
+                    }
+                ]
+            }
+            print("DEBUG: Making HTTP request...")
+            response = requests.post(
+                f"{base_url}/v1/chat/completions",
+                headers={"Content-Type": "application/json"},
+                json=payload,
+                timeout=10
+            )
+            print(f"DEBUG: Response status: {response.status_code}")
+            if not response.ok:
+                error_msg = f"Server error: {response.status_code} - {response.text}"
+                print(f"DEBUG: {error_msg}")
+                return error_msg
+            data = response.json()
+            result = data["choices"][0]["message"]["content"]
+            print(f"DEBUG: Success - got response: {result}")
+            return result
+        except Exception as e:
+            error_msg = f"Error: {str(e)}"
+            print(f"DEBUG: Exception occurred: {error_msg}")
+            return error_msg
+# Initialize processor
+processor = CameraProcessor()
+def process_image(instruction, image, base_url):
+    """Main processing function for Gradio interface"""
+    print(f"DEBUG: process_image called - is_processing: {processor.is_processing}")
+    print(f"DEBUG: instruction: '{instruction}'")
+    print(f"DEBUG: base_url: '{base_url}'")
+    print(f"DEBUG: image is None: {image is None}")
+    print(f"DEBUG: image type: {type(image)}")
+    # Always return something to test if function is being called
+    if image is None:
+        print("DEBUG: No image from webcam")
+        return "No image from webcam - check camera permissions or try a different browser"
+    # For manual testing, skip the processing state check
+    # if not processor.is_processing:
+    #     print("DEBUG: Not processing - returning early")
+    #     return "Click Start to begin processing"
+    if not instruction.strip():
+        print("DEBUG: No instruction provided")
+        return "Please enter an instruction"
+    if not base_url.strip():
+        print("DEBUG: No base URL provided")
+        return "Please enter a base URL"
+    print("DEBUG: Calling process_frame")
+    result = processor.process_frame(instruction, image, base_url)
+    print(f"DEBUG: process_frame result: {result}")
+    return result
+def toggle_processing():
+    """Toggle processing state"""
+    processor.is_processing = not processor.is_processing
+    print(f"DEBUG: Processing toggled to: {processor.is_processing}")
+    if processor.is_processing:
+        return "Stop", "Processing started..."
+    else:
+        return "Start", "Processing stopped."
+def update_stream_interval(interval):
+    """Update streaming interval"""
+    return gr.update(stream_every=interval)
+def test_api_connection(base_url):
+    """Test if API server is reachable"""
+    try:
+        response = requests.get(f"{base_url}/health", timeout=5)
+        return f"API accessible: {response.status_code}"
+    except Exception as e:
+        return f"API connection failed: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Camera Interaction App", theme=gr.themes.Soft()) as interface:
+    gr.Markdown("# Camera Interaction App")
+    gr.Markdown("**Note:** Make sure to grant camera permissions in your browser!")
+    with gr.Row():
+        # Video input
+        video_input = gr.Image(
+            sources=["webcam"],
+            label="Camera Feed - Click to capture",
+            width=480,
+            height=360
+        )
+    with gr.Column():
+        # Base URL input
+        base_url_input = gr.Textbox(
+            label="Base API URL",
+            value="http://localhost:8080",
+            placeholder="Enter API base URL"
+        )
+        # Instruction input
+        instruction_input = gr.Textbox(
+            label="Instruction",
+            value="What do you see?",
+            placeholder="Enter your instruction",
+            lines=2
+        )
+        # Response output
+        response_output = gr.Textbox(
+            label="Response",
+            value="1. Grant camera permissions\n2. Capture a photo\n3. Click Process Image",
+            interactive=False,
+            lines=3
+        )
+    with gr.Row():
+        # Manual process button for testing
+        process_button = gr.Button("Process Image", variant="primary")
+        # Test button
+        test_button = gr.Button("Test API Connection", variant="secondary")
+    with gr.Row():
+        test_output = gr.Textbox(label="Connection Test", interactive=False)
+    # Manual processing for testing
+    process_button.click(
+        fn=process_image,
+        inputs=[instruction_input, video_input, base_url_input],
+        outputs=response_output
+    )
+    test_button.click(
+        fn=test_api_connection,
+        inputs=base_url_input,
+        outputs=test_output
+    )
+if __name__ == "__main__":
+    interface.launch(
+        server_name="localhost",
+        server_port=7860,
+        share=False,
+        debug=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+opencv-python
+numpy