""" Base class for all research tools """ from abc import ABC, abstractmethod from typing import Dict, Any, Optional import time import re from datetime import datetime class BaseTool(ABC): """Base class for all research tools""" def __init__(self, name: str, description: str): self.name = name self.description = description self.last_request_time = 0 self.rate_limit_delay = 1.0 # seconds between requests @abstractmethod def search(self, query: str, **kwargs) -> str: """Main search method - must be implemented by subclasses""" pass def rate_limit(self): """Simple rate limiting to be respectful to APIs""" current_time = time.time() time_since_last = current_time - self.last_request_time if time_since_last < self.rate_limit_delay: time.sleep(self.rate_limit_delay - time_since_last) self.last_request_time = time.time() def score_research_quality(self, research_result: str, source: str = "web") -> Dict[str, float]: """Score research based on multiple quality indicators""" quality_score = { "recency": self._check_recency(research_result), "authority": self._check_authority(research_result, source), "specificity": self._check_specificity(research_result), "relevance": self._check_relevance(research_result), "overall": 0.0 } # Weighted overall score weights = {"recency": 0.2, "authority": 0.3, "specificity": 0.3, "relevance": 0.2} quality_score["overall"] = sum(quality_score[metric] * weight for metric, weight in weights.items()) return quality_score def _check_recency(self, text: str) -> float: """Check for recent dates and current information""" if not text: return 0.3 # Look for years years = re.findall(r'\b(20\d{2})\b', text) if years: latest_year = max(int(year) for year in years) current_year = datetime.now().year recency = max(0, 1 - (current_year - latest_year) / 10) # Decay over 10 years return recency return 0.3 # Default for no date found def _check_authority(self, text: str, source: str) -> float: """Check source authority and credibility indicators""" authority_indicators = { 'arxiv': 0.9, 'sec': 0.95, 'github': 0.7, 'wikipedia': 0.8, 'web': 0.5 } base_score = authority_indicators.get(source.lower(), 0.5) # Look for credibility markers in text if text: credibility_markers = ['study', 'research', 'university', 'published', 'peer-reviewed', 'official'] marker_count = sum(1 for marker in credibility_markers if marker in text.lower()) credibility_boost = min(0.3, marker_count * 0.05) base_score += credibility_boost return min(1.0, base_score) def _check_specificity(self, text: str) -> float: """Check for specific data points and quantitative information""" if not text: return 0.1 # Count numbers, percentages, specific metrics numbers = len(re.findall(r'\b\d+(?:\.\d+)?%?\b', text)) specific_terms = len(re.findall(r'\b(?:exactly|precisely|specifically|measured|calculated)\b', text, re.IGNORECASE)) specificity = min(1.0, (numbers * 0.02) + (specific_terms * 0.1)) return max(0.1, specificity) # Minimum baseline def _check_relevance(self, text: str) -> float: """Check relevance to query (simplified implementation)""" # This would ideally use the original query for comparison # For now, return a baseline that could be enhanced return 0.7 # Placeholder - could be enhanced with query matching def should_use_for_query(self, query: str) -> bool: """Determine if this tool should be used for the given query""" # Default implementation - override in subclasses for smart routing return True def extract_key_info(self, text: str) -> Dict[str, Any]: """Extract key information from research results""" if not text: return {} return { 'length': len(text), 'has_numbers': bool(re.search(r'\d+', text)), 'has_dates': bool(re.search(r'\b20\d{2}\b', text)), 'has_urls': bool(re.search(r'http[s]?://', text)) } def format_error_response(self, query: str, error: str) -> str: """Format a consistent error response""" return f"**{self.name} Research for: {query}**\n\nResearch temporarily unavailable: {str(error)[:100]}..."