Spaces:

Agents-MCP-Hackathon
/

TinkerIQ-Your_Sidekick_for_Building_Real-World_Stuff_with_AI

Running

App Files Files Community

amacruz commited on Jun 5

Commit

ce150af

verified ·

1 Parent(s): 97df926

Create vision_analysis.py

Browse files

Files changed (1) hide show

vision_analysis.py +670 -0

vision_analysis.py ADDED Viewed

	@@ -0,0 +1,670 @@

+"""
+TinkerIQ Advanced Vision Analysis Module
+Circuit and schematic analysis using SambaNova and OpenAI vision models
+"""
+import os
+import requests
+import base64
+import time
+from PIL import Image
+import io
+import re
+from datetime import datetime
+class AdvancedVisionAnalysis:
+    """Advanced computer vision for electronic circuits and schematics"""
+    def __init__(self):
+        # API credentials
+        self.sambanova_api_key = os.getenv("SAMBANOVA_API_KEY")
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
+        # API endpoints
+        self.sambanova_url = "https://api.sambanova.ai/v1/chat/completions"
+        self.openai_vision_url = "https://api.openai.com/v1/chat/completions"
+        self.anthropic_url = "https://api.anthropic.com/v1/messages"
+        # Analysis templates for different circuit types
+        self.analysis_prompts = {
+            "detailed": """You are an expert electronics engineer analyzing a circuit image. Provide a comprehensive technical analysis:
+## COMPONENT IDENTIFICATION
+- List ALL visible electronic components with specific part numbers where readable
+- Include passive components (resistors, capacitors, inductors) with values if visible
+- Identify active components (ICs, transistors, diodes) with part numbers
+- Note any development boards (Arduino, ESP32, Raspberry Pi, etc.)
+- Identify connectors, switches, and mechanical components
+## CIRCUIT TOPOLOGY ANALYSIS
+- Describe the overall circuit architecture and design pattern
+- Explain the signal flow and data paths through the circuit
+- Identify power supply connections and voltage levels
+- Note ground connections and power distribution
+- Describe any special circuit configurations or topologies
+## FUNCTIONAL ANALYSIS
+- Explain what this circuit is designed to accomplish
+- Describe the main functional blocks and their purposes
+- Identify input/output connections and interfaces
+- Explain any control or feedback mechanisms
+- Note any protection circuits or safety features
+## TECHNICAL ASSESSMENT
+- Evaluate if connections appear correct for the intended function
+- Identify any obvious wiring errors or missing connections
+- Check for proper component orientations (especially polarized components)
+- Assess power supply adequacy and current handling
+- Note any potential signal integrity or noise issues
+## IMPROVEMENT RECOMMENDATIONS
+- Suggest specific component upgrades or alternatives
+- Recommend additional protection or filtering components
+- Propose layout improvements for better performance
+- Suggest debugging aids or test points
+- Recommend documentation or labeling improvements
+## SAFETY AND COMPLIANCE
+- Identify any safety concerns or hazardous conditions
+- Note compliance with standard practices and conventions
+- Suggest safety improvements or protective measures
+- Check for proper isolation and grounding
+Be extremely specific and technical. Include part numbers, values, and precise technical terminology.""",
+            "troubleshooting": """Analyze this circuit image for troubleshooting purposes. Focus on identifying potential problems:
+## VISUAL INSPECTION
+- Check all solder joints for cold solder, bridges, or dry joints
+- Verify component orientations (LEDs, electrolytic capacitors, ICs, diodes)
+- Look for damaged components (burned, cracked, or discolored)
+- Check for loose connections or disconnected wires
+- Identify any short circuits or unwanted connections
+## WIRING VERIFICATION
+- Trace power connections (VCC, VDD, +5V, +3.3V, etc.)
+- Verify ground connections and ground loops
+- Check data/signal line connections
+- Validate pin assignments match intended design
+- Look for crossed or swapped connections
+## COMPONENT ANALYSIS
+- Verify component values match circuit requirements
+- Check component ratings (voltage, current, power)
+- Identify any substituted or incorrect components
+- Note missing components or empty footprints
+- Check for counterfeit or suspect components
+## COMMON FAILURE MODES
+- Identify typical failure points for this circuit type
+- Look for stress indicators (heat damage, corrosion)
+- Check for mechanical damage or wear
+- Note any signs of overcurrent or overvoltage damage
+- Identify environmental damage (moisture, contamination)
+## DEBUGGING RECOMMENDATIONS
+- Suggest specific measurement points for multimeter testing
+- Recommend signal tracing procedures
+- Propose component substitution tests
+- Suggest isolation techniques for fault finding
+- Recommend tools or equipment for further diagnosis
+Focus on actionable troubleshooting steps and specific technical guidance.""",
+            "educational": """Analyze this circuit from an educational perspective for learning purposes:
+## CIRCUIT FUNDAMENTALS
+- Explain the basic operating principles in clear terms
+- Identify key concepts demonstrated by this circuit
+- Describe the role of each major component type
+- Explain power flow and signal paths
+- Connect theory to practical implementation
+## LEARNING OBJECTIVES
+- What skills does building this circuit teach?
+- What theoretical concepts are demonstrated?
+- How does this relate to broader electronics knowledge?
+- What prerequisites should learners have?
+- What follow-up projects would build on this knowledge?
+## COMPONENT EDUCATION
+- Explain why each component was chosen
+- Describe component specifications and ratings
+- Suggest alternatives and their trade-offs
+- Explain how to read component markings and datasheets
+- Discuss component sourcing and selection criteria
+## CONSTRUCTION GUIDANCE
+- Provide step-by-step assembly recommendations
+- Highlight critical construction points
+- Suggest testing procedures at each stage
+- Recommend tools and techniques
+- Identify common beginner mistakes to avoid
+## EXPERIMENTATION IDEAS
+- Suggest modifications to explore different behaviors
+- Propose parameter variations for learning
+- Recommend additional measurements or observations
+- Suggest related experiments or variations
+- Provide ideas for extending the project
+Make the analysis accessible but technically accurate, perfect for STEM education."""
+        }
+        # Response validation patterns
+        self.validation_patterns = {
+            "generic_responses": [
+                "i'm unable to view",
+                "i cannot see",
+                "i can't see the image",
+                "i don't have the ability to view",
+                "i cannot analyze images",
+                "without seeing the actual",
+                "based on the description",
+                "typical circuit",
+                "common configuration",
+                "standard setup"
+            ],
+            "hallucination_indicators": [
+                "appears to be",
+                "seems to be",
+                "looks like it might be",
+                "could be",
+                "possibly",
+                "probably"
+            ]
+        }
+        print(f"🔧 Vision Analysis initialized")
+        print(f"   SambaNova API: {'✅' if self.sambanova_api_key else '❌'}")
+        print(f"   OpenAI API: {'✅' if self.openai_api_key else '❌'}")
+        print(f"   Anthropic API: {'✅' if self.anthropic_api_key else '❌'}")
+    def encode_image_safely(self, image_path, max_size=1024, quality=85):
+        """
+        Convert image to base64 with optimization for vision APIs
+        Args:
+            image_path: Path to image file or file-like object
+            max_size: Maximum dimension for resizing
+            quality: JPEG quality (1-100)
+        Returns:
+            tuple: (base64_string, error_message)
+        """
+        try:
+            if image_path is None:
+                return None, "No image provided"
+            # Handle different input types
+            if hasattr(image_path, 'name') and image_path.name:
+                # Gradio file upload
+                image_file_path = image_path.name
+                print(f"📁 Processing Gradio file: {image_file_path}")
+            elif isinstance(image_path, str):
+                # File path string
+                image_file_path = image_path
+                print(f"📁 Processing file path: {image_file_path}")
+            else:
+                return None, "Unsupported image format"
+            # Open and process image
+            with Image.open(image_file_path) as img:
+                # Get original dimensions
+                original_size = img.size
+                print(f"📐 Original image size: {original_size[0]}x{original_size[1]}")
+                # Convert to RGB if necessary
+                if img.mode in ('RGBA', 'LA', 'P'):
+                    print(f"🔄 Converting from {img.mode} to RGB")
+                    img = img.convert('RGB')
+                # Resize if too large
+                if img.width > max_size or img.height > max_size:
+                    print(f"📏 Resizing to max dimension: {max_size}")
+                    img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+                    print(f"📐 New size: {img.width}x{img.height}")
+                # Convert to bytes with optimization
+                buffer = io.BytesIO()
+                # Use PNG for line drawings/schematics, JPEG for photos
+                if self._is_schematic_like(img):
+                    img.save(buffer, format='PNG', optimize=True)
+                    format_used = "PNG"
+                else:
+                    img.save(buffer, format='JPEG', quality=quality, optimize=True)
+                    format_used = "JPEG"
+                image_data = buffer.getvalue()
+                file_size = len(image_data)
+                print(f"✅ Image processed: {file_size} bytes ({format_used})")
+                # Check size limits (most APIs have ~20MB limit)
+                if file_size > 15 * 1024 * 1024:  # 15MB safety margin
+                    return None, f"Image too large: {file_size/1024/1024:.1f}MB (max ~15MB)"
+                # Encode to base64
+                base64_string = base64.b64encode(image_data).decode('utf-8')
+                return base64_string, None
+        except FileNotFoundError:
+            return None, f"Image file not found: {image_path}"
+        except Exception as e:
+            error_msg = f"Image processing error: {str(e)}"
+            print(f"❌ {error_msg}")
+            return None, error_msg
+    def _is_schematic_like(self, img):
+        """Heuristic to determine if image is a schematic vs photo"""
+        # Convert to grayscale for analysis
+        gray = img.convert('L')
+        # Count unique colors (schematics typically have fewer)
+        colors = len(set(gray.getdata()))
+        # Schematics typically have high contrast and few colors
+        return colors < 50
+    def validate_analysis_response(self, response_text, analysis_type="detailed"):
+        """
+        Validate that the AI actually analyzed the image vs providing generic advice
+        Args:
+            response_text: AI response to validate
+            analysis_type: Type of analysis requested
+        Returns:
+            tuple: (is_valid, error_message)
+        """
+        response_lower = response_text.lower()
+        # Check for generic/inability responses
+        for pattern in self.validation_patterns["generic_responses"]:
+            if pattern in response_lower:
+                return False, f"Generic response detected: '{pattern}'"
+        # Check response length (too short suggests generic response)
+        if len(response_text.strip()) < 200:
+            return False, "Response too short/generic"
+        # Check for excessive uncertainty language
+        uncertainty_count = sum(1 for pattern in self.validation_patterns["hallucination_indicators"]
+                              if pattern in response_lower)
+        if uncertainty_count > 5:  # Too many uncertain statements
+            return False, f"Excessive uncertainty in response ({uncertainty_count} indicators)"
+        # Check for specific technical content based on analysis type
+        if analysis_type == "detailed":
+            required_sections = ["component", "circuit", "connection"]
+            found_sections = sum(1 for section in required_sections if section in response_lower)
+            if found_sections < 2:
+                return False, "Response lacks technical depth"
+        print("✅ Response validation passed")
+        return True, None
+    def analyze_with_sambanova(self, image_path, analysis_type="detailed", custom_prompt=None):
+        """
+        Analyze circuit image using SambaNova vision model
+        Args:
+            image_path: Path to image file
+            analysis_type: Type of analysis (detailed, troubleshooting, educational)
+            custom_prompt: Custom analysis prompt
+        Returns:
+            tuple: (result_dict, error_message)
+        """
+        print("🚀 Starting SambaNova vision analysis...")
+        if not self.sambanova_api_key:
+            return None, "SambaNova API key not configured"
+        # Encode image
+        base64_image, error = self.encode_image_safely(image_path)
+        if error:
+            return None, f"Image processing failed: {error}"
+        try:
+            # Select prompt
+            if custom_prompt:
+                prompt = custom_prompt
+            else:
+                prompt = self.analysis_prompts.get(analysis_type, self.analysis_prompts["detailed"])
+            # Prepare request
+            headers = {
+                "Authorization": f"Bearer {self.sambanova_api_key}",
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "model": "Llama-3.2-90B-Vision-Instruct",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}"
+                                }
+                            }
+                        ]
+                    }
+                ],
+                "max_tokens": 2000,
+                "temperature": 0.1,  # Low temperature for technical accuracy
+                "top_p": 0.9
+            }
+            print("📡 Sending request to SambaNova...")
+            response = requests.post(
+                self.sambanova_url,
+                headers=headers,
+                json=payload,
+                timeout=120  # Longer timeout for vision processing
+            )
+            print(f"📊 SambaNova response: HTTP {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                if "choices" in result and len(result["choices"]) > 0:
+                    analysis = result["choices"][0]["message"]["content"]
+                    # Validate response quality
+                    is_valid, validation_error = self.validate_analysis_response(analysis, analysis_type)
+                    if is_valid:
+                        print("✅ SambaNova analysis validated successfully")
+                        return {
+                            "success": True,
+                            "analysis": analysis,
+                            "provider": "SambaNova",
+                            "model": "Llama-3.2-90B-Vision-Instruct",
+                            "analysis_type": analysis_type,
+                            "timestamp": datetime.now().isoformat()
+                        }, None
+                    else:
+                        print(f"❌ SambaNova validation failed: {validation_error}")
+                        return None, f"SambaNova analysis validation failed: {validation_error}"
+                else:
+                    return None, "No analysis content in SambaNova response"
+            else:
+                error_text = response.text
+                print(f"❌ SambaNova API error: {error_text}")
+                return None, f"SambaNova API error {response.status_code}: {error_text}"
+        except requests.exceptions.Timeout:
+            return None, "SambaNova request timeout - try again"
+        except requests.exceptions.RequestException as e:
+            return None, f"SambaNova request failed: {str(e)}"
+        except Exception as e:
+            return None, f"SambaNova analysis error: {str(e)}"
+    def analyze_with_openai(self, image_path, analysis_type="detailed", custom_prompt=None):
+        """
+        Analyze circuit image using OpenAI vision model
+        Args:
+            image_path: Path to image file
+            analysis_type: Type of analysis
+            custom_prompt: Custom analysis prompt
+        Returns:
+            tuple: (result_dict, error_message)
+        """
+        print("🔄 Starting OpenAI vision analysis...")
+        if not self.openai_api_key:
+            return None, "OpenAI API key not configured"
+        # Encode image
+        base64_image, error = self.encode_image_safely(image_path)
+        if error:
+            return None, f"Image processing failed: {error}"
+        try:
+            # Select prompt
+            if custom_prompt:
+                prompt = custom_prompt
+            else:
+                prompt = self.analysis_prompts.get(analysis_type, self.analysis_prompts["detailed"])
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.openai_api_key}"
+            }
+            payload = {
+                "model": "gpt-4o",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                    "detail": "high"
+                                }
+                            }
+                        ]
+                    }
+                ],
+                "max_tokens": 2000,
+                "temperature": 0.1
+            }
+            print("📡 Sending request to OpenAI...")
+            response = requests.post(self.openai_vision_url, headers=headers, json=payload, timeout=90)
+            print(f"📊 OpenAI response: HTTP {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                analysis = result["choices"][0]["message"]["content"]
+                # Validate response
+                is_valid, validation_error = self.validate_analysis_response(analysis, analysis_type)
+                if is_valid:
+                    print("✅ OpenAI analysis validated successfully")
+                    return {
+                        "success": True,
+                        "analysis": analysis,
+                        "provider": "OpenAI",
+                        "model": "GPT-4o",
+                        "analysis_type": analysis_type,
+                        "timestamp": datetime.now().isoformat()
+                    }, None
+                else:
+                    print(f"❌ OpenAI validation failed: {validation_error}")
+                    return None, f"OpenAI analysis validation failed: {validation_error}"
+            else:
+                return None, f"OpenAI API error {response.status_code}: {response.text}"
+        except Exception as e:
+            return None, f"OpenAI analysis error: {str(e)}"
+    def analyze_image(self, image_path, analysis_type="detailed", custom_prompt=None, preferred_provider=None):
+        """
+        Main image analysis function with multiple provider fallback
+        Args:
+            image_path: Path to image file
+            analysis_type: Type of analysis to perform
+            custom_prompt: Custom analysis prompt
+            preferred_provider: Preferred AI provider
+        Returns:
+            dict: Analysis result with success/failure information
+        """
+        if not image_path:
+            return {
+                "success": False,
+                "error": "No image provided",
+                "analysis": "Please upload an image to analyze."
+            }
+        print(f"🎯 Starting {analysis_type} image analysis...")
+        # Define provider order
+        if preferred_provider == "openai":
+            providers = [
+                ("OpenAI", self.analyze_with_openai),
+                ("SambaNova", self.analyze_with_sambanova)
+            ]
+        else:
+            providers = [
+                ("SambaNova", self.analyze_with_sambanova),
+                ("OpenAI", self.analyze_with_openai)
+            ]
+        # Try each provider
+        for provider_name, provider_func in providers:
+            print(f"🔄 Trying {provider_name}...")
+            result, error = provider_func(image_path, analysis_type, custom_prompt)
+            if result and result.get("success"):
+                print(f"✅ {provider_name} analysis successful!")
+                return result
+            else:
+                print(f"❌ {provider_name} failed: {error}")
+        # All providers failed
+        return {
+            "success": False,
+            "error": "All vision providers failed",
+            "analysis": f"""🔍 **Vision Analysis Failed**
+**Problem:** All available vision AI models failed to analyze your image.
+**Possible Issues:**
+- Image format or quality problems
+- API service temporarily unavailable
+- Image too large or unclear
+- Network connectivity issues
+**Solutions:**
+1. **Try Different Format:** Convert PNG ↔ JPG
+2. **Improve Image Quality:** Ensure good lighting and focus
+3. **Reduce File Size:** Compress image if very large
+4. **Manual Description:** Describe your circuit below
+**Manual Analysis:**
+Tell me about your circuit:
+- What components do you see?
+- What is the circuit supposed to do?
+- What specific problems are you experiencing?
+- What improvements are you looking for?
+I can provide targeted advice based on your description! 🛠️
+**Example:** "I have an Arduino connected to an L298N motor driver and two DC motors. The motors should move forward when I press a button, but nothing happens."
+""",
+            "provider": "None",
+            "model": "Fallback",
+            "analysis_type": analysis_type,
+            "timestamp": datetime.now().isoformat()
+        }
+    def batch_analyze_images(self, image_paths, analysis_type="detailed"):
+        """
+        Analyze multiple images in batch
+        Args:
+            image_paths: List of image paths
+            analysis_type: Type of analysis
+        Returns:
+            list: List of analysis results
+        """
+        results = []
+        for i, image_path in enumerate(image_paths):
+            print(f"🔄 Analyzing image {i+1}/{len(image_paths)}")
+            result = self.analyze_image(image_path, analysis_type)
+            results.append(result)
+            # Rate limiting between requests
+            if i < len(image_paths) - 1:
+                time.sleep(1)
+        return results
+    def get_analysis_summary(self, analysis_result):
+        """
+        Extract key information from analysis result
+        Args:
+            analysis_result: Result from analyze_image()
+        Returns:
+            dict: Summarized information
+        """
+        if not analysis_result.get("success"):
+            return {
+                "circuit_type": "unknown",
+                "components": [],
+                "issues": [],
+                "recommendations": []
+            }
+        analysis_text = analysis_result["analysis"].lower()
+        # Extract circuit type
+        circuit_types = {
+            "h_bridge": ["h-bridge", "motor driver", "motor control"],
+            "power_supply": ["power supply", "regulator", "voltage"],
+            "amplifier": ["amplifier", "amp", "audio"],
+            "sensor": ["sensor", "temperature", "humidity", "distance"],
+            "microcontroller": ["arduino", "esp32", "microcontroller"]
+        }
+        detected_type = "general"
+        for circuit_type, keywords in circuit_types.items():
+            if any(keyword in analysis_text for keyword in keywords):
+                detected_type = circuit_type
+                break
+        # Extract components (simple keyword search)
+        common_components = [
+            "arduino", "esp32", "resistor", "capacitor", "led", "transistor",
+            "diode", "ic", "motor", "sensor", "display", "battery"
+        ]
+        found_components = [comp for comp in common_components if comp in analysis_text]
+        return {
+            "circuit_type": detected_type,
+            "components": found_components,
+            "provider": analysis_result.get("provider", "unknown"),
+            "model": analysis_result.get("model", "unknown"),
+            "confidence": "high" if analysis_result.get("provider") == "SambaNova" else "medium"
+        }
+# Test function
+if __name__ == "__main__":
+    analyzer = AdvancedVisionAnalysis()
+    # Test with a sample analysis
+    print("🧪 Testing vision analysis module...")
+    # This would normally test with an actual image file
+    # result = analyzer.analyze_image("test_circuit.jpg", "detailed")
+    # print(json.dumps(result, indent=2))
+    print("✅ Vision analysis module loaded successfully")