Spaces:

Agents-MCP-Hackathon
/

consilium_mcp

Running

App Files Files Community

azettl commited on 9 days ago

Commit

ce0bf87

1 Parent(s): e840693

add new research tools

Browse files

Files changed (14) hide show

.gitignore +5 -0
app.py +182 -167
enhanced_search_functions.py +148 -0
requirements.txt +3 -2
research_tools/__init__.py +20 -0
research_tools/arxiv_search.py +164 -0
research_tools/base_tool.py +123 -0
research_tools/github_search.py +203 -0
research_tools/research_agent.py +489 -0
research_tools/scholar_search.py +256 -0
research_tools/sec_search.py +340 -0
research_tools/web_search.py +83 -0
research_tools/wikipedia_search.py +87 -0
test_research_tools.py +337 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+/.gradio
+/research_tools/__pycache__
+/__pycache__

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ import queue
 import uuid
 from gradio_consilium_roundtable import consilium_roundtable
 from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, VisitWebpageTool, Tool
 # Load environment variables
 load_dotenv()
@@ -34,133 +36,6 @@ avatar_images = {
     "Meta-Llama-3.3-70B-Instruct": "https://registry.npmmirror.com/@lobehub/icons-static-png/1.46.0/files/dark/meta-color.png",
 }
-# NATIVE FUNCTION CALLING: Define search functions for both Mistral and SambaNova
-SEARCH_FUNCTIONS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "search_web",
-            "description": "Search the web for current information and data relevant to the decision being analyzed",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": "The search query to find current information relevant to the expert analysis"
-                    }
-                },
-                "required": ["query"]
-            }
-        }
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "search_wikipedia",
-            "description": "Search Wikipedia for comprehensive background information and authoritative data",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "topic": {
-                        "type": "string",
-                        "description": "The topic to research on Wikipedia for comprehensive background information"
-                    }
-                },
-                "required": ["topic"]
-            }
-        }
-    }
-]
-class WikipediaTool(Tool):
-    name = "wikipedia_search"
-    description = "Search Wikipedia for comprehensive information on any topic"
-    inputs = {"query": {"type": "string", "description": "The topic to search for on Wikipedia"}}
-    output_type = "string"
-    def forward(self, query: str) -> str:
-        try:
-            import wikipedia
-            # Search for the topic
-            search_results = wikipedia.search(query, results=3)
-            if not search_results:
-                return f"No Wikipedia articles found for: {query}"
-            # Get the first article
-            page = wikipedia.page(search_results[0])
-            summary = page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
-            return f"**Wikipedia: {page.title}**\n\n{summary}\n\nSource: {page.url}"
-        except Exception as e:
-            return f"Wikipedia search error: {str(e)}"
-class WebSearchAgent:
-    def __init__(self):
-        try:
-            self.agent = CodeAgent(
-                tools=[
-                    DuckDuckGoSearchTool(),
-                    VisitWebpageTool(),
-                    WikipediaTool(),
-                    FinalAnswerTool()
-                ],
-                model=InferenceClientModel(),
-                max_steps=3,
-                verbosity_level=0
-            )
-        except Exception as e:
-            print(f"Warning: Could not initialize search agent: {e}")
-            self.agent = None
-    def search(self, query: str, max_results: int = 5) -> str:
-        """Use the CodeAgent to perform comprehensive web search and analysis"""
-        if not self.agent:
-            return f"Research agent not available. Please check dependencies."
-        try:
-            # Simplified prompt for TinyLlama to avoid code parsing issues
-            agent_prompt = f"Search for information about: {query}. Provide a brief summary of findings."
-            # Run the agent
-            result = self.agent.run(agent_prompt)
-            # Clean and validate the result
-            if result and isinstance(result, str) and len(result.strip()) > 0:
-                # Remove any code-like syntax that might cause parsing errors
-                cleaned_result = result.replace('```', '').replace('`', '').strip()
-                return f"**Web Research Results for: {query}**\n\n{cleaned_result}"
-            else:
-                return f"**Research for: {query}**\n\nNo clear results found. Please try a different search term."
-        except Exception as e:
-            # More robust fallback - return something useful instead of failing
-            error_msg = str(e)
-            if "max steps" in error_msg.lower():
-                return f"**Research for: {query}**\n\nResearch completed but reached complexity limit. Basic analysis: This query relates to {query.lower()} and would benefit from further investigation."
-            elif "syntax" in error_msg.lower():
-                return f"**Research for: {query}**\n\nResearch encountered formatting issues but found relevant information about {query.lower()}."
-            else:
-                return f"**Research for: {query}**\n\nResearch temporarily unavailable. Error: {error_msg[:100]}..."
-    def search_wikipedia(self, topic: str) -> str:
-        """Search Wikipedia for comprehensive information"""
-        try:
-            wiki_tool = WikipediaTool()
-            result = wiki_tool.forward(topic)
-            # Ensure we return a proper string and clean it
-            if result and isinstance(result, str):
-                # Clean any code syntax that might cause issues
-                cleaned_result = result.replace('```', '').replace('`', '').strip()
-                return cleaned_result
-            elif result:
-                return str(result)
-            else:
-                return f"**Wikipedia Research for: {topic}**\n\nNo results found, but this topic likely relates to {topic.lower()} and warrants further investigation."
-        except Exception as e:
-            return f"**Wikipedia Research for: {topic}**\n\nResearch temporarily unavailable but {topic.lower()} is a relevant topic for analysis. Error: {str(e)[:100]}..."
 def get_session_id(request: gr.Request = None) -> str:
     """Generate or retrieve session ID"""
     if request and hasattr(request, 'session_hash'):
@@ -218,7 +93,7 @@ def update_session_api_keys(mistral_key, sambanova_key, session_id_state, reques
 class VisualConsensusEngine:
     def __init__(self, moderator_model: str = None, update_callback=None, session_id: str = None):
         self.moderator_model = moderator_model or MODERATOR_MODEL
-        self.search_agent = WebSearchAgent()
         self.update_callback = update_callback
         self.session_id = session_id
@@ -314,54 +189,81 @@ class VisualConsensusEngine:
         # PRESERVE existing bubbles throughout research
         existing_bubbles = list(set(msg["speaker"] for msg in all_messages if msg.get("speaker") and msg["speaker"] != "Research Agent"))
-        # Step 1: Show expert waiting for research
-        waiting_message = {
             "speaker": speaker,
-            "text": f"🔍 Requesting research: {query}",
             "type": "research_request"
         }
-        all_messages.append(waiting_message)
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": speaker,
             "thinking": [],
-            "showBubbles": existing_bubbles + [speaker]  # PRESERVE + ADD CURRENT
         })
-        time.sleep(1)
-        # Step 2: Show Research Agent thinking
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": None,
             "thinking": ["Research Agent"],
-            "showBubbles": existing_bubbles + [speaker, "Research Agent"]  # PRESERVE ALL
         })
-        time.sleep(1)
-        # Step 3: Show Research Agent working
-        research_message = {
             "speaker": "Research Agent",
-            "text": f"🔍 Researching: {function.replace('_', ' ')} - '{query}'",
             "type": "research_activity"
         }
-        all_messages.append(research_message)
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": "Research Agent",
             "thinking": [],
-            "showBubbles": existing_bubbles + [speaker, "Research Agent"]  # PRESERVE ALL
         })
-        time.sleep(2)  # Longer pause to see research happening
-        # Step 4: Research Agent goes back to quiet, expert processes results
         processing_message = {
             "speaker": speaker,
-            "text": f"📊 Processing research results...",
             "type": "research_processing"
         }
         all_messages.append(processing_message)
@@ -371,12 +273,33 @@ class VisualConsensusEngine:
             "messages": all_messages,
             "currentSpeaker": speaker,
             "thinking": [],
-            "showBubbles": existing_bubbles + [speaker]  # PRESERVE EXISTING + CURRENT
         })
-        time.sleep(1)
     def handle_function_calls(self, completion, original_prompt: str, calling_model: str) -> str:
-        """UNIFIED function call handler for both Mistral and SambaNova"""
         # Check if completion is valid
         if not completion or not completion.choices or len(completion.choices) == 0:
@@ -387,10 +310,8 @@ class VisualConsensusEngine:
         # If no function calls, return regular response
         if not hasattr(message, 'tool_calls') or not message.tool_calls:
-            # EXTRACT CONTENT PROPERLY
             content = message.content
             if isinstance(content, list):
-                # Handle structured content (like from Mistral)
                 text_parts = []
                 for part in content:
                     if isinstance(part, dict) and 'text' in part:
@@ -422,21 +343,30 @@ class VisualConsensusEngine:
                 arguments = json.loads(tool_call.function.arguments)
                 # Show research activity in UI
-                query_param = arguments.get("query") or arguments.get("topic")
                 if query_param:
                     self.show_research_activity(calling_model_name, function_name, query_param)
-                # Execute the function
-                if function_name == "search_web":
-                    result = self.search_agent.search(arguments["query"])
-                elif function_name == "search_wikipedia":
-                    result = self.search_agent.search_wikipedia(arguments["topic"])
-                else:
-                    result = f"Unknown function: {function_name}"
-                # Ensure result is a string, not an object
                 if not isinstance(result, str):
                     result = str(result)
                 # Add function result to conversation
                 messages.append({
@@ -447,7 +377,6 @@ class VisualConsensusEngine:
             except Exception as e:
                 print(f"Error processing tool call: {str(e)}")
-                # Add error result to conversation
                 messages.append({
                     "role": "tool",
                     "tool_call_id": tool_call.id,
@@ -487,7 +416,6 @@ class VisualConsensusEngine:
             if final_completion and final_completion.choices and len(final_completion.choices) > 0:
                 final_content = final_completion.choices[0].message.content
-                # HANDLE STRUCTURED CONTENT FROM FINAL RESPONSE TOO
                 if isinstance(final_content, list):
                     text_parts = []
                     for part in final_content:
@@ -506,6 +434,42 @@ class VisualConsensusEngine:
         except Exception as e:
             print(f"Error in follow-up completion for {calling_model}: {str(e)}")
             return message.content or "Analysis completed with research integration."
     def call_model(self, model: str, prompt: str, context: str = "") -> Optional[str]:
         """Enhanced model calling with native function calling support"""
@@ -562,7 +526,7 @@ class VisualConsensusEngine:
                 completion = client.chat.completions.create(
                     model=sambanova_model,
                     messages=[{"role": "user", "content": prompt}],
-                    tools=SEARCH_FUNCTIONS,
                     tool_choice="auto",
                     max_tokens=1000,
                     temperature=0.7
@@ -614,7 +578,7 @@ class VisualConsensusEngine:
             completion = client.chat.completions.create(
                 model='mistral-large-latest',
                 messages=[{"role": "user", "content": prompt}],
-                tools=SEARCH_FUNCTIONS,
                 tool_choice="auto",
                 max_tokens=1000,
                 temperature=0.7
@@ -802,7 +766,7 @@ ANALYSIS REQUIREMENTS:
 - {action_prompt}
 - {stakes}
 - Use specific examples, data, and evidence
-- If you need current information or research, you can search the web or Wikipedia
 - Maximum 200 words of focused analysis
 - End with "Position: [YOUR CLEAR STANCE]" and "Confidence: X/10"
@@ -1222,7 +1186,7 @@ def run_consensus_discussion_session(question: str, discussion_rounds: int = 3,
 - **Research Integration:** Native function calling with live data
 - **Session ID:** {session_id[:3]}...
-*Generated by Consilium Visual AI Consensus Platform*"""
     # Format session-specific discussion log
     formatted_log = format_session_discussion_log(session["discussion_log"])
@@ -1242,10 +1206,13 @@ def format_session_discussion_log(discussion_log: list) -> str:
     for entry in discussion_log:
         timestamp = entry.get('timestamp', datetime.now().strftime('%H:%M:%S'))
         if entry['type'] == 'thinking':
             formatted_log += f"**{timestamp}** 🤔 **{entry['speaker']}** is analyzing...\n\n"
         elif entry['type'] == 'speaking':
             formatted_log += f"**{timestamp}** 💬 **{entry['speaker']}** is presenting...\n\n"
         elif entry['type'] == 'message':
             formatted_log += f"**{timestamp}** 📋 **{entry['speaker']}** ({entry.get('role', 'standard')}):\n"
             formatted_log += f"> {entry['content']}\n"
@@ -1253,6 +1220,28 @@ def format_session_discussion_log(discussion_log: list) -> str:
                 formatted_log += f"*Confidence: {entry['confidence']}/10*\n\n"
             else:
                 formatted_log += "\n"
         elif entry['type'] == 'phase':
             formatted_log += f"\n---\n## {entry['content']}\n---\n\n"
@@ -1291,7 +1280,7 @@ def check_model_status_session(session_id_state: str = None, request: gr.Request
     return status_info
 # Create the professional interface
-with gr.Blocks(title="🎭 Consilium: Visual AI Consensus Platform", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎭 Consilium: Multi-AI Expert Consensus Platform
@@ -1307,7 +1296,7 @@ with gr.Blocks(title="🎭 Consilium: Visual AI Consensus Platform", theme=gr.th
     * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
     * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
     * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
-    * Research Agent to search via **DuckDuckGo** or **Wikipedia**, added as a tool for the models from Mistral and Llama.
     * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
     * Pick one model as the lead analyst (had the best results when picking Mistral).
     * Configure the amount of discussion rounds.
@@ -1534,6 +1523,32 @@ with gr.Blocks(title="🎭 Consilium: Visual AI Consensus Platform", theme=gr.th
         """)
     with gr.Tab("📚 Documentation"):
         gr.Markdown("""
         ## 🎓 **Expert Role Assignments**

 import uuid
 from gradio_consilium_roundtable import consilium_roundtable
 from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, VisitWebpageTool, Tool
+from research_tools import EnhancedResearchAgent
+from enhanced_search_functions import ENHANCED_SEARCH_FUNCTIONS
 # Load environment variables
 load_dotenv()
     "Meta-Llama-3.3-70B-Instruct": "https://registry.npmmirror.com/@lobehub/icons-static-png/1.46.0/files/dark/meta-color.png",
 }
 def get_session_id(request: gr.Request = None) -> str:
     """Generate or retrieve session ID"""
     if request and hasattr(request, 'session_hash'):
 class VisualConsensusEngine:
     def __init__(self, moderator_model: str = None, update_callback=None, session_id: str = None):
         self.moderator_model = moderator_model or MODERATOR_MODEL
+        self.search_agent = EnhancedResearchAgent()
         self.update_callback = update_callback
         self.session_id = session_id
         # PRESERVE existing bubbles throughout research
         existing_bubbles = list(set(msg["speaker"] for msg in all_messages if msg.get("speaker") and msg["speaker"] != "Research Agent"))
+        # Get function display name
+        function_display = {
+            'search_web': 'Web Search',
+            'search_wikipedia': 'Wikipedia',
+            'search_academic': 'Academic Papers',
+            'search_technology_trends': 'Technology Trends',
+            'search_financial_data': 'Financial Data',
+            'multi_source_research': 'Multi-Source Research'
+        }.get(function, function.replace('_', ' ').title())
+        # Step 1: Show expert requesting research
+        request_message = {
             "speaker": speaker,
+            "text": f"🔍 **Research Request**: {function_display}\n📝 Query: \"{query}\"",
             "type": "research_request"
         }
+        all_messages.append(request_message)
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": speaker,
             "thinking": [],
+            "showBubbles": existing_bubbles + [speaker]
         })
+        time.sleep(1.5)
+        # Step 2: Research Agent starts thinking
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": None,
             "thinking": ["Research Agent"],
+            "showBubbles": existing_bubbles + [speaker, "Research Agent"]
         })
+        time.sleep(2)
+        # Step 3: Research Agent working - show detailed activity
+        working_message = {
             "speaker": "Research Agent",
+            "text": f"🔍 **Conducting Research**: {function_display}\n📊 Analyzing: \"{query}\"\n⏳ Please wait while I gather information...",
             "type": "research_activity"
         }
+        all_messages.append(working_message)
         self.update_visual_state({
             "participants": participants,
             "messages": all_messages,
             "currentSpeaker": "Research Agent",
             "thinking": [],
+            "showBubbles": existing_bubbles + [speaker, "Research Agent"]
         })
+        time.sleep(3)  # Longer pause to see research happening
+        # Step 4: Research completion notification
+        completion_message = {
+            "speaker": "Research Agent",
+            "text": f"✅ **Research Complete**: {function_display}\n📋 Results ready for analysis",
+            "type": "research_complete"
+        }
+        all_messages.append(completion_message)
+        self.update_visual_state({
+            "participants": participants,
+            "messages": all_messages,
+            "currentSpeaker": "Research Agent",
+            "thinking": [],
+            "showBubbles": existing_bubbles + [speaker, "Research Agent"]
+        })
+        time.sleep(1.5)
+        # Step 5: Expert processing results
         processing_message = {
             "speaker": speaker,
+            "text": f"📊 **Processing Research Results**\n🧠 Integrating {function_display} findings into analysis...",
             "type": "research_processing"
         }
         all_messages.append(processing_message)
             "messages": all_messages,
             "currentSpeaker": speaker,
             "thinking": [],
+            "showBubbles": existing_bubbles + [speaker, "Research Agent"]  # Keep Research Agent visible longer
         })
+        time.sleep(2)
+    def log_research_activity(self, speaker: str, function: str, query: str, result: str, log_function=None):
+        """Log research activity to the discussion log"""
+        if log_function:
+            # Log the research request
+            log_function('research_request',
+                        speaker="Research Agent",
+                        content=f"Research requested by {speaker}: {function.replace('_', ' ').title()} - '{query}'",
+                        function=function,
+                        query=query,
+                        requesting_expert=speaker)
+            # Log the research result (truncated for readability)
+            result_preview = result[:300] + "..." if len(result) > 300 else result
+            log_function('research_result',
+                        speaker="Research Agent",
+                        content=f"Research completed: {function.replace('_', ' ').title()}\n\n{result_preview}",
+                        function=function,
+                        query=query,
+                        full_result=result,
+                        requesting_expert=speaker)
     def handle_function_calls(self, completion, original_prompt: str, calling_model: str) -> str:
+        """UNIFIED function call handler with enhanced research capabilities"""
         # Check if completion is valid
         if not completion or not completion.choices or len(completion.choices) == 0:
         # If no function calls, return regular response
         if not hasattr(message, 'tool_calls') or not message.tool_calls:
             content = message.content
             if isinstance(content, list):
                 text_parts = []
                 for part in content:
                     if isinstance(part, dict) and 'text' in part:
                 arguments = json.loads(tool_call.function.arguments)
                 # Show research activity in UI
+                query_param = arguments.get("query") or arguments.get("topic") or arguments.get("technology") or arguments.get("company")
                 if query_param:
                     self.show_research_activity(calling_model_name, function_name, query_param)
+                # Execute the enhanced research functions
+                result = self._execute_research_function(function_name, arguments)
+                # Ensure result is a string
                 if not isinstance(result, str):
                     result = str(result)
+                # Log the research activity (with access to session log function)
+                session = get_or_create_session_state(self.session_id)
+                def session_log_function(event_type, speaker="", content="", **kwargs):
+                    session["discussion_log"].append({
+                        'type': event_type,
+                        'speaker': speaker,
+                        'content': content,
+                        'timestamp': datetime.now().strftime('%H:%M:%S'),
+                        **kwargs
+                    })
+                if query_param and result:
+                    self.log_research_activity(calling_model_name, function_name, query_param, result, session_log_function)
                 # Add function result to conversation
                 messages.append({
             except Exception as e:
                 print(f"Error processing tool call: {str(e)}")
                 messages.append({
                     "role": "tool",
                     "tool_call_id": tool_call.id,
             if final_completion and final_completion.choices and len(final_completion.choices) > 0:
                 final_content = final_completion.choices[0].message.content
                 if isinstance(final_content, list):
                     text_parts = []
                     for part in final_content:
         except Exception as e:
             print(f"Error in follow-up completion for {calling_model}: {str(e)}")
             return message.content or "Analysis completed with research integration."
+    def _execute_research_function(self, function_name: str, arguments: dict) -> str:
+        """Execute research function with enhanced capabilities"""
+        try:
+            if function_name == "search_web":
+                depth = arguments.get("depth", "standard")
+                return self.search_agent.search(arguments["query"], depth)
+            elif function_name == "search_wikipedia":
+                return self.search_agent.search_wikipedia(arguments["topic"])
+            elif function_name == "search_academic":
+                source = arguments.get("source", "both")
+                if source == "arxiv":
+                    return self.search_agent.tools['arxiv'].search(arguments["query"])
+                elif source == "scholar":
+                    return self.search_agent.tools['scholar'].search(arguments["query"])
+                else:  # both
+                    arxiv_result = self.search_agent.tools['arxiv'].search(arguments["query"])
+                    scholar_result = self.search_agent.tools['scholar'].search(arguments["query"])
+                    return f"{arxiv_result}\n\n{scholar_result}"
+            elif function_name == "search_technology_trends":
+                return self.search_agent.tools['github'].search(arguments["technology"])
+            elif function_name == "search_financial_data":
+                return self.search_agent.tools['sec'].search(arguments["company"])
+            elif function_name == "multi_source_research":
+                return self.search_agent.search(arguments["query"], "deep")
+            else:
+                return f"Unknown research function: {function_name}"
+        except Exception as e:
+            return f"Research function error: {str(e)}"
     def call_model(self, model: str, prompt: str, context: str = "") -> Optional[str]:
         """Enhanced model calling with native function calling support"""
                 completion = client.chat.completions.create(
                     model=sambanova_model,
                     messages=[{"role": "user", "content": prompt}],
+                    tools=ENHANCED_SEARCH_FUNCTIONS,
                     tool_choice="auto",
                     max_tokens=1000,
                     temperature=0.7
             completion = client.chat.completions.create(
                 model='mistral-large-latest',
                 messages=[{"role": "user", "content": prompt}],
+                tools=ENHANCED_SEARCH_FUNCTIONS,
                 tool_choice="auto",
                 max_tokens=1000,
                 temperature=0.7
 - {action_prompt}
 - {stakes}
 - Use specific examples, data, and evidence
+- If you need current information or research, you can search the web, Wikipedia, academic papers, technology trends, or financial data
 - Maximum 200 words of focused analysis
 - End with "Position: [YOUR CLEAR STANCE]" and "Confidence: X/10"
 - **Research Integration:** Native function calling with live data
 - **Session ID:** {session_id[:3]}...
+*Generated by Consilium: Multi-AI Expert Consensus Platform*"""
     # Format session-specific discussion log
     formatted_log = format_session_discussion_log(session["discussion_log"])
     for entry in discussion_log:
         timestamp = entry.get('timestamp', datetime.now().strftime('%H:%M:%S'))
         if entry['type'] == 'thinking':
             formatted_log += f"**{timestamp}** 🤔 **{entry['speaker']}** is analyzing...\n\n"
         elif entry['type'] == 'speaking':
             formatted_log += f"**{timestamp}** 💬 **{entry['speaker']}** is presenting...\n\n"
         elif entry['type'] == 'message':
             formatted_log += f"**{timestamp}** 📋 **{entry['speaker']}** ({entry.get('role', 'standard')}):\n"
             formatted_log += f"> {entry['content']}\n"
                 formatted_log += f"*Confidence: {entry['confidence']}/10*\n\n"
             else:
                 formatted_log += "\n"
+        elif entry['type'] == 'research_request':
+            function_name = entry.get('function', 'Unknown')
+            query = entry.get('query', 'Unknown query')
+            requesting_expert = entry.get('requesting_expert', 'Unknown expert')
+            formatted_log += f"**{timestamp}** 🔍 **Research Agent** - Research Request:\n"
+            formatted_log += f"> **Function:** {function_name.replace('_', ' ').title()}\n"
+            formatted_log += f"> **Query:** \"{query}\"\n"
+            formatted_log += f"> **Requested by:** {requesting_expert}\n\n"
+        elif entry['type'] == 'research_result':
+            function_name = entry.get('function', 'Unknown')
+            query = entry.get('query', 'Unknown query')
+            requesting_expert = entry.get('requesting_expert', 'Unknown expert')
+            full_result = entry.get('full_result', entry.get('content', 'No result'))
+            formatted_log += f"**{timestamp}** 📊 **Research Agent** - Research Results:\n"
+            formatted_log += f"> **Function:** {function_name.replace('_', ' ').title()}\n"
+            formatted_log += f"> **Query:** \"{query}\"\n"
+            formatted_log += f"> **For Expert:** {requesting_expert}\n\n"
+            formatted_log += f"**Research Results:**\n"
+            formatted_log += f"```\n{full_result}\n```\n\n"
         elif entry['type'] == 'phase':
             formatted_log += f"\n---\n## {entry['content']}\n---\n\n"
     return status_info
 # Create the professional interface
+with gr.Blocks(title="🎭 Consilium: Multi-AI Expert Consensus Platform", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎭 Consilium: Multi-AI Expert Consensus Platform
     * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
     * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
     * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
+    * Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**, **Google Scholar**) for comprehensive live research.
     * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
     * Pick one model as the lead analyst (had the best results when picking Mistral).
     * Configure the amount of discussion rounds.
         """)
     with gr.Tab("📚 Documentation"):
+        gr.Markdown("""
+        ## 🔬 **Research Capabilities**
+        ### **🌐 Multi-Source Research**
+        - **DuckDuckGo Web Search**: Current events, news, real-time information
+        - **Wikipedia**: Authoritative background and encyclopedic data
+        - **arXiv**: Academic papers and scientific research preprints
+        - **Google Scholar**: Peer-reviewed research and citation analysis
+        - **GitHub**: Technology trends, adoption patterns, developer activity
+        - **SEC EDGAR**: Public company financial data and regulatory filings
+        ### **🎯 Smart Research Routing**
+        The system automatically routes queries to the most appropriate sources:
+        - **Academic queries** → arXiv + Google Scholar
+        - **Technology questions** → GitHub + Web Search
+        - **Company research** → SEC filings + Web Search
+        - **Current events** → Web Search + Wikipedia
+        - **Deep research** → Multi-source synthesis with quality scoring
+        ### **📊 Research Quality Scoring**
+        Each research result is scored on:
+        - **Recency** (0-1): How current is the information
+        - **Authority** (0-1): Source credibility and reliability
+        - **Specificity** (0-1): Quantitative data and specific details
+        - **Relevance** (0-1): How well it matches the query
+        """)
         gr.Markdown("""
         ## 🎓 **Expert Role Assignments**

enhanced_search_functions.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Enhanced Search Functions for Native Function Calling
+This file defines all the function calling schemas for the enhanced research system
+"""
+ENHANCED_SEARCH_FUNCTIONS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_web",
+            "description": "Search the web for current information and real-time data using DuckDuckGo",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query to find current information relevant to the expert analysis"
+                    },
+                    "depth": {
+                        "type": "string",
+                        "enum": ["standard", "deep"],
+                        "description": "Search depth - 'standard' for single source, 'deep' for multi-source synthesis",
+                        "default": "standard"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_wikipedia",
+            "description": "Search Wikipedia for comprehensive background information and authoritative encyclopedic data",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "topic": {
+                        "type": "string",
+                        "description": "The topic to research on Wikipedia for comprehensive background information"
+                    }
+                },
+                "required": ["topic"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_academic",
+            "description": "Search academic papers and research on arXiv and Google Scholar for scientific evidence",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Academic research query to find peer-reviewed papers and scientific studies"
+                    },
+                    "source": {
+                        "type": "string",
+                        "enum": ["arxiv", "scholar", "both"],
+                        "description": "Academic source to search - arXiv for preprints, Scholar for citations, both for comprehensive",
+                        "default": "both"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_technology_trends",
+            "description": "Search GitHub for technology adoption, development trends, and open source activity",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "technology": {
+                        "type": "string",
+                        "description": "Technology, framework, or programming language to research for adoption trends"
+                    }
+                },
+                "required": ["technology"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_financial_data",
+            "description": "Search SEC EDGAR filings and financial data for public companies",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "company": {
+                        "type": "string",
+                        "description": "Company name or ticker symbol to research financial data and SEC filings"
+                    }
+                },
+                "required": ["company"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "multi_source_research",
+            "description": "Perform comprehensive multi-source research synthesis across all available sources",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Research query for comprehensive multi-source analysis"
+                    },
+                    "priority_sources": {
+                        "type": "array",
+                        "items": {
+                            "type": "string",
+                            "enum": ["web", "wikipedia", "arxiv", "scholar", "github", "sec"]
+                        },
+                        "description": "Priority list of sources to focus on for this research",
+                        "default": []
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    }
+]
+def get_function_definitions():
+    """Get the complete function definitions for API calls"""
+    return ENHANCED_SEARCH_FUNCTIONS
+def get_function_names():
+    """Get list of all available function names"""
+    return [func["function"]["name"] for func in ENHANCED_SEARCH_FUNCTIONS]
+# Function routing map for backward compatibility
+FUNCTION_ROUTING = {
+    "search_web": "web_search",
+    "search_wikipedia": "wikipedia_search",
+    "search_academic": "academic_search",
+    "search_technology_trends": "github_search",
+    "search_financial_data": "sec_search",
+    "multi_source_research": "multi_source_search"
+}

requirements.txt CHANGED Viewed

@@ -5,6 +5,7 @@ markdownify
 requests
 python-dotenv
 duckduckgo-search
-wikipedia-api
 gradio-consilium-roundtable
-openai

 requests
 python-dotenv
 duckduckgo-search
+wikipedia
 gradio-consilium-roundtable
+openai
+scholarly

research_tools/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# Research Tools Package
+from .base_tool import BaseTool
+from .web_search import WebSearchTool
+from .wikipedia_search import WikipediaSearchTool
+from .arxiv_search import ArxivSearchTool
+from .github_search import GitHubSearchTool
+from .sec_search import SECSearchTool
+from .scholar_search import GoogleScholarTool
+from .research_agent import EnhancedResearchAgent
+__all__ = [
+    'BaseTool',
+    'WebSearchTool',
+    'WikipediaSearchTool',
+    'ArxivSearchTool',
+    'GitHubSearchTool',
+    'SECSearchTool',
+    'GoogleScholarTool',
+    'EnhancedResearchAgent'
+]

research_tools/arxiv_search.py ADDED Viewed

	@@ -0,0 +1,164 @@

+"""
+arXiv Academic Papers Search Tool
+"""
+from .base_tool import BaseTool
+import requests
+import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional
+from urllib.parse import quote
+class ArxivSearchTool(BaseTool):
+    """Search arXiv for academic papers and research"""
+    def __init__(self):
+        super().__init__("arXiv", "Search academic papers and research on arXiv")
+        self.base_url = "http://export.arxiv.org/api/query"
+        self.rate_limit_delay = 2.0  # Be respectful to arXiv
+    def search(self, query: str, max_results: int = 5, **kwargs) -> str:
+        """Search arXiv for academic papers"""
+        self.rate_limit()
+        try:
+            # Prepare search parameters
+            params = {
+                'search_query': f'all:{query}',
+                'start': 0,
+                'max_results': max_results,
+                'sortBy': 'relevance',
+                'sortOrder': 'descending'
+            }
+            # Make request with better error handling
+            response = requests.get(self.base_url, params=params, timeout=20,
+                                  headers={'User-Agent': 'Research Tool ([email protected])'})
+            response.raise_for_status()
+            # Parse XML response
+            root = ET.fromstring(response.content)
+            # Extract paper information
+            papers = []
+            for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
+                paper = self._parse_arxiv_entry(entry)
+                if paper:
+                    papers.append(paper)
+            # Format results
+            if papers:
+                result = f"**arXiv Academic Research for: {query}**\n\n"
+                for i, paper in enumerate(papers, 1):
+                    result += f"**Paper {i}: {paper['title']}**\n"
+                    result += f"Authors: {paper['authors']}\n"
+                    result += f"Published: {paper['published']}\n"
+                    result += f"Category: {paper.get('category', 'Unknown')}\n"
+                    result += f"Abstract: {paper['abstract'][:400]}...\n"
+                    result += f"Link: {paper['link']}\n\n"
+                # Add research quality assessment
+                result += self._assess_arxiv_quality(papers)
+                return result
+            else:
+                return f"**arXiv Research for: {query}**\n\nNo relevant academic papers found on arXiv."
+        except requests.Timeout:
+            return f"**arXiv Research for: {query}**\n\nRequest timeout - arXiv may be experiencing high load. Research available but slower than expected."
+        except requests.ConnectionError as e:
+            if "Connection reset" in str(e):
+                return f"**arXiv Research for: {query}**\n\nConnection reset by arXiv server - this is common due to rate limiting. Academic research is available but temporarily throttled."
+            return self.format_error_response(query, f"Connection error: {str(e)}")
+        except requests.RequestException as e:
+            return self.format_error_response(query, f"Network error accessing arXiv: {str(e)}")
+        except ET.ParseError as e:
+            return self.format_error_response(query, f"Error parsing arXiv response: {str(e)}")
+        except Exception as e:
+            return self.format_error_response(query, str(e))
+    def _parse_arxiv_entry(self, entry) -> Optional[Dict[str, str]]:
+        """Parse individual arXiv entry"""
+        try:
+            ns = {'atom': 'http://www.w3.org/2005/Atom'}
+            title = entry.find('atom:title', ns)
+            title_text = title.text.strip().replace('\n', ' ') if title is not None else "Unknown Title"
+            authors = entry.findall('atom:author/atom:name', ns)
+            author_names = [author.text for author in authors] if authors else ["Unknown Author"]
+            published = entry.find('atom:published', ns)
+            published_text = published.text[:10] if published is not None else "Unknown Date"  # YYYY-MM-DD
+            summary = entry.find('atom:summary', ns)
+            abstract = summary.text.strip().replace('\n', ' ') if summary is not None else "No abstract available"
+            link = entry.find('atom:id', ns)
+            link_url = link.text if link is not None else ""
+            # Extract category
+            categories = entry.findall('atom:category', ns)
+            category = categories[0].get('term') if categories else "Unknown"
+            return {
+                'title': title_text,
+                'authors': ', '.join(author_names[:3]),  # Limit to first 3 authors
+                'published': published_text,
+                'abstract': abstract,
+                'link': link_url,
+                'category': category
+            }
+        except Exception as e:
+            print(f"Error parsing arXiv entry: {e}")
+            return None
+    def _assess_arxiv_quality(self, papers: List[Dict]) -> str:
+        """Assess the quality of arXiv search results"""
+        if not papers:
+            return ""
+        # Calculate average recency
+        current_year = 2025
+        recent_papers = sum(1 for paper in papers if paper['published'].startswith(('2024', '2025')))
+        quality_assessment = f"**Research Quality Assessment:**\n"
+        quality_assessment += f"• Papers found: {len(papers)}\n"
+        quality_assessment += f"• Recent papers (2024-2025): {recent_papers}/{len(papers)}\n"
+        # Check for high-impact categories
+        categories = [paper.get('category', '') for paper in papers]
+        ml_ai_papers = sum(1 for cat in categories if any(term in cat.lower() for term in ['cs.ai', 'cs.lg', 'cs.cv', 'stat.ml']))
+        if ml_ai_papers > 0:
+            quality_assessment += f"• AI/ML papers: {ml_ai_papers}\n"
+        quality_assessment += f"• Authority level: High (peer-reviewed preprints)\n\n"
+        return quality_assessment
+    def should_use_for_query(self, query: str) -> bool:
+        """arXiv is good for scientific, technical, and research-oriented queries"""
+        academic_indicators = [
+            'research', 'study', 'analysis', 'scientific', 'algorithm', 'method',
+            'machine learning', 'ai', 'artificial intelligence', 'deep learning',
+            'neural network', 'computer science', 'physics', 'mathematics',
+            'quantum', 'cryptography', 'blockchain', 'paper', 'academic'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in academic_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from arXiv results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for arXiv-specific patterns
+            base_info.update({
+                'paper_count': text.count('**Paper'),
+                'has_abstracts': 'Abstract:' in text,
+                'has_recent_papers': any(year in text for year in ['2024', '2025']),
+                'has_ai_ml': any(term in text.lower() for term in ['machine learning', 'ai', 'neural', 'deep learning']),
+                'has_arxiv_links': 'arxiv.org' in text
+            })
+        return base_info

research_tools/base_tool.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+Base class for all research tools
+"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+import time
+import re
+from datetime import datetime
+class BaseTool(ABC):
+    """Base class for all research tools"""
+    def __init__(self, name: str, description: str):
+        self.name = name
+        self.description = description
+        self.last_request_time = 0
+        self.rate_limit_delay = 1.0  # seconds between requests
+    @abstractmethod
+    def search(self, query: str, **kwargs) -> str:
+        """Main search method - must be implemented by subclasses"""
+        pass
+    def rate_limit(self):
+        """Simple rate limiting to be respectful to APIs"""
+        current_time = time.time()
+        time_since_last = current_time - self.last_request_time
+        if time_since_last < self.rate_limit_delay:
+            time.sleep(self.rate_limit_delay - time_since_last)
+        self.last_request_time = time.time()
+    def score_research_quality(self, research_result: str, source: str = "web") -> Dict[str, float]:
+        """Score research based on multiple quality indicators"""
+        quality_score = {
+            "recency": self._check_recency(research_result),
+            "authority": self._check_authority(research_result, source),
+            "specificity": self._check_specificity(research_result),
+            "relevance": self._check_relevance(research_result),
+            "overall": 0.0
+        }
+        # Weighted overall score
+        weights = {"recency": 0.2, "authority": 0.3, "specificity": 0.3, "relevance": 0.2}
+        quality_score["overall"] = sum(quality_score[metric] * weight for metric, weight in weights.items())
+        return quality_score
+    def _check_recency(self, text: str) -> float:
+        """Check for recent dates and current information"""
+        if not text:
+            return 0.3
+        # Look for years
+        years = re.findall(r'\b(20\d{2})\b', text)
+        if years:
+            latest_year = max(int(year) for year in years)
+            current_year = datetime.now().year
+            recency = max(0, 1 - (current_year - latest_year) / 10)  # Decay over 10 years
+            return recency
+        return 0.3  # Default for no date found
+    def _check_authority(self, text: str, source: str) -> float:
+        """Check source authority and credibility indicators"""
+        authority_indicators = {
+            'arxiv': 0.9,
+            'scholar': 0.9,
+            'sec': 0.95,
+            'github': 0.7,
+            'wikipedia': 0.8,
+            'web': 0.5
+        }
+        base_score = authority_indicators.get(source.lower(), 0.5)
+        # Look for credibility markers in text
+        if text:
+            credibility_markers = ['study', 'research', 'university', 'published', 'peer-reviewed', 'official']
+            marker_count = sum(1 for marker in credibility_markers if marker in text.lower())
+            credibility_boost = min(0.3, marker_count * 0.05)
+            base_score += credibility_boost
+        return min(1.0, base_score)
+    def _check_specificity(self, text: str) -> float:
+        """Check for specific data points and quantitative information"""
+        if not text:
+            return 0.1
+        # Count numbers, percentages, specific metrics
+        numbers = len(re.findall(r'\b\d+(?:\.\d+)?%?\b', text))
+        specific_terms = len(re.findall(r'\b(?:exactly|precisely|specifically|measured|calculated)\b', text, re.IGNORECASE))
+        specificity = min(1.0, (numbers * 0.02) + (specific_terms * 0.1))
+        return max(0.1, specificity)  # Minimum baseline
+    def _check_relevance(self, text: str) -> float:
+        """Check relevance to query (simplified implementation)"""
+        # This would ideally use the original query for comparison
+        # For now, return a baseline that could be enhanced
+        return 0.7  # Placeholder - could be enhanced with query matching
+    def should_use_for_query(self, query: str) -> bool:
+        """Determine if this tool should be used for the given query"""
+        # Default implementation - override in subclasses for smart routing
+        return True
+    def extract_key_info(self, text: str) -> Dict[str, Any]:
+        """Extract key information from research results"""
+        if not text:
+            return {}
+        return {
+            'length': len(text),
+            'has_numbers': bool(re.search(r'\d+', text)),
+            'has_dates': bool(re.search(r'\b20\d{2}\b', text)),
+            'has_urls': bool(re.search(r'http[s]?://', text))
+        }
+    def format_error_response(self, query: str, error: str) -> str:
+        """Format a consistent error response"""
+        return f"**{self.name} Research for: {query}**\n\nResearch temporarily unavailable: {str(error)[:100]}..."

research_tools/github_search.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+GitHub Technology Trends Search Tool
+"""
+from .base_tool import BaseTool
+import requests
+import json
+from typing import Dict, List, Optional
+from datetime import datetime, timedelta
+class GitHubSearchTool(BaseTool):
+    """Search GitHub for technology trends and adoption patterns"""
+    def __init__(self):
+        super().__init__("GitHub", "Search GitHub for technology adoption and development trends")
+        self.base_url = "https://api.github.com"
+        self.rate_limit_delay = 2.0  # GitHub has rate limits
+    def search(self, technology: str, max_results: int = 5, **kwargs) -> str:
+        """Search GitHub for technology trends and adoption"""
+        self.rate_limit()
+        try:
+            # Search repositories
+            repos_data = self._search_repositories(technology, max_results)
+            if not repos_data or not repos_data.get('items'):
+                return f"**GitHub Technology Research for: {technology}**\n\nNo relevant repositories found."
+            result = f"**GitHub Technology Trends for: {technology}**\n\n"
+            # Repository analysis
+            result += self._format_repository_data(repos_data['items'], technology)
+            # Trend analysis
+            result += self._analyze_technology_trends(repos_data, technology)
+            # Recent activity analysis
+            result += self._analyze_recent_activity(repos_data['items'], technology)
+            return result
+        except requests.RequestException as e:
+            return self.format_error_response(technology, f"Network error accessing GitHub: {str(e)}")
+        except Exception as e:
+            return self.format_error_response(technology, str(e))
+    def _search_repositories(self, technology: str, max_results: int) -> Optional[Dict]:
+        """Search GitHub repositories for the technology"""
+        repos_url = f"{self.base_url}/search/repositories"
+        # Create comprehensive search query
+        search_query = f'{technology} language:python OR language:javascript OR language:typescript OR language:go OR language:rust'
+        params = {
+            'q': search_query,
+            'sort': 'stars',
+            'order': 'desc',
+            'per_page': max_results
+        }
+        response = requests.get(repos_url, params=params, timeout=15)
+        response.raise_for_status()
+        return response.json()
+    def _format_repository_data(self, repositories: List[Dict], technology: str) -> str:
+        """Format repository information"""
+        result = f"**Top {len(repositories)} Repositories:**\n"
+        for i, repo in enumerate(repositories, 1):
+            stars = repo.get('stargazers_count', 0)
+            forks = repo.get('forks_count', 0)
+            language = repo.get('language', 'Unknown')
+            updated = repo.get('updated_at', '')[:10]  # YYYY-MM-DD
+            result += f"**{i}. {repo['name']}** ({stars:,} ⭐, {forks:,} 🍴)\n"
+            result += f"   Language: {language} | Updated: {updated}\n"
+            description = repo.get('description', 'No description')
+            if description and len(description) > 100:
+                description = description[:100] + "..."
+            result += f"   Description: {description}\n"
+            result += f"   URL: {repo.get('html_url', 'N/A')}\n\n"
+        return result
+    def _analyze_technology_trends(self, repos_data: Dict, technology: str) -> str:
+        """Analyze technology adoption trends"""
+        total_count = repos_data.get('total_count', 0)
+        items = repos_data.get('items', [])
+        if not items:
+            return ""
+        # Calculate adoption metrics
+        total_stars = sum(repo.get('stargazers_count', 0) for repo in items)
+        total_forks = sum(repo.get('forks_count', 0) for repo in items)
+        avg_stars = total_stars / len(items) if items else 0
+        # Determine adoption level
+        if total_count > 50000:
+            adoption_level = "Very High"
+        elif total_count > 10000:
+            adoption_level = "High"
+        elif total_count > 1000:
+            adoption_level = "Moderate"
+        elif total_count > 100:
+            adoption_level = "Emerging"
+        else:
+            adoption_level = "Niche"
+        # Language analysis
+        languages = {}
+        for repo in items:
+            lang = repo.get('language')
+            if lang:
+                languages[lang] = languages.get(lang, 0) + 1
+        result = f"**Technology Adoption Analysis:**\n"
+        result += f"• Total repositories: {total_count:,}\n"
+        result += f"• Adoption level: {adoption_level}\n"
+        result += f"• Average stars (top repos): {avg_stars:,.0f}\n"
+        result += f"• Total community engagement: {total_stars:,} stars, {total_forks:,} forks\n"
+        if languages:
+            top_languages = sorted(languages.items(), key=lambda x: x[1], reverse=True)[:3]
+            result += f"• Popular languages: {', '.join(f'{lang} ({count})' for lang, count in top_languages)}\n"
+        result += "\n"
+        return result
+    def _analyze_recent_activity(self, repositories: List[Dict], technology: str) -> str:
+        """Analyze recent development activity"""
+        if not repositories:
+            return ""
+        # Check update recency
+        current_date = datetime.now()
+        recent_updates = 0
+        very_recent_updates = 0
+        for repo in repositories:
+            updated_str = repo.get('updated_at', '')
+            if updated_str:
+                try:
+                    updated_date = datetime.fromisoformat(updated_str.replace('Z', '+00:00'))
+                    days_ago = (current_date - updated_date.replace(tzinfo=None)).days
+                    if days_ago <= 30:
+                        very_recent_updates += 1
+                    if days_ago <= 90:
+                        recent_updates += 1
+                except:
+                    pass
+        result = f"**Development Activity:**\n"
+        result += f"• Recently updated (30 days): {very_recent_updates}/{len(repositories)} repositories\n"
+        result += f"• Active projects (90 days): {recent_updates}/{len(repositories)} repositories\n"
+        # Activity assessment
+        if very_recent_updates / len(repositories) > 0.7:
+            activity_level = "Very Active"
+        elif recent_updates / len(repositories) > 0.5:
+            activity_level = "Active"
+        elif recent_updates / len(repositories) > 0.3:
+            activity_level = "Moderate"
+        else:
+            activity_level = "Low"
+        result += f"• Overall activity level: {activity_level}\n"
+        result += f"• Community health: {'Strong' if activity_level in ['Very Active', 'Active'] else 'Moderate'} developer engagement\n\n"
+        return result
+    def should_use_for_query(self, query: str) -> bool:
+        """GitHub is good for technology, framework, and development-related queries"""
+        tech_indicators = [
+            'technology', 'framework', 'library', 'software', 'programming',
+            'development', 'developer', 'code', 'github', 'open source',
+            'javascript', 'python', 'react', 'nodejs', 'django', 'flask',
+            'vue', 'angular', 'typescript', 'rust', 'go', 'kotlin',
+            'adoption', 'popular', 'trending', 'tools', 'stack'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in tech_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from GitHub results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for GitHub-specific patterns
+            base_info.update({
+                'repo_count': text.count('repositories'),
+                'has_stars': '⭐' in text,
+                'has_forks': '🍴' in text,
+                'has_recent_activity': any(year in text for year in ['2024', '2025']),
+                'adoption_mentioned': any(term in text.lower() for term in ['adoption', 'popular', 'trending']),
+                'languages_analyzed': 'Popular languages:' in text
+            })
+        return base_info

research_tools/research_agent.py ADDED Viewed

	@@ -0,0 +1,489 @@

+"""
+Enhanced Research Agent with Multi-Source Integration
+"""
+from typing import Dict, List, Any, Optional, Tuple
+import re
+from collections import Counter
+from .base_tool import BaseTool
+from .web_search import WebSearchTool
+from .wikipedia_search import WikipediaSearchTool
+from .arxiv_search import ArxivSearchTool
+from .github_search import GitHubSearchTool
+from .sec_search import SECSearchTool
+from .scholar_search import GoogleScholarTool
+class EnhancedResearchAgent:
+    """Enhanced research agent with multi-source synthesis and smart routing"""
+    def __init__(self):
+        # Initialize all research tools
+        self.tools = {
+            'web': WebSearchTool(),
+            'wikipedia': WikipediaSearchTool(),
+            'arxiv': ArxivSearchTool(),
+            'github': GitHubSearchTool(),
+            'sec': SECSearchTool(),
+            'scholar': GoogleScholarTool()
+        }
+        # Tool availability status
+        self.tool_status = {name: True for name in self.tools.keys()}
+    def search(self, query: str, research_depth: str = "standard") -> str:
+        """Main search method with intelligent routing"""
+        if research_depth == "deep":
+            return self._deep_multi_source_search(query)
+        else:
+            return self._standard_search(query)
+    def search_wikipedia(self, topic: str) -> str:
+        """Wikipedia search method for backward compatibility"""
+        return self.tools['wikipedia'].search(topic)
+    def _standard_search(self, query: str) -> str:
+        """Standard single-source search with smart routing"""
+        # Determine best tool for the query
+        best_tool = self._route_query_to_tool(query)
+        try:
+            return self.tools[best_tool].search(query)
+        except Exception as e:
+            # Fallback to web search
+            if best_tool != 'web':
+                try:
+                    return self.tools['web'].search(query)
+                except Exception as e2:
+                    return f"**Research for: {query}**\n\nResearch temporarily unavailable: {str(e2)[:100]}..."
+            else:
+                return f"**Research for: {query}**\n\nResearch temporarily unavailable: {str(e)[:100]}..."
+    def _deep_multi_source_search(self, query: str) -> str:
+        """Deep research using multiple sources with synthesis"""
+        results = {}
+        quality_scores = {}
+        # Determine which sources to use based on query type
+        relevant_tools = self._get_relevant_tools(query)
+        # Collect results from multiple sources
+        for tool_name in relevant_tools:
+            try:
+                result = self.tools[tool_name].search(query)
+                if result and len(result.strip()) > 50:  # Ensure meaningful result
+                    results[tool_name] = result
+                    quality_scores[tool_name] = self.tools[tool_name].score_research_quality(result, tool_name)
+            except Exception as e:
+                print(f"Error with {tool_name}: {e}")
+                continue
+        if not results:
+            return f"**Deep Research for: {query}**\n\nNo sources were able to provide results. Please try a different query."
+        # Synthesize results
+        return self._synthesize_multi_source_results(query, results, quality_scores)
+    def _route_query_to_tool(self, query: str) -> str:
+        """Intelligently route query to the most appropriate tool"""
+        query_lower = query.lower()
+        # Priority routing based on query characteristics
+        for tool_name, tool in self.tools.items():
+            if tool.should_use_for_query(query):
+                # Return first matching tool based on priority order
+                priority_order = ['arxiv', 'sec', 'github', 'scholar', 'wikipedia', 'web']
+                if tool_name in priority_order[:3]:  # High-priority specialized tools
+                    return tool_name
+        # Secondary check for explicit indicators
+        if any(indicator in query_lower for indicator in ['company', 'stock', 'financial', 'revenue']):
+            return 'sec'
+        elif any(indicator in query_lower for indicator in ['research', 'study', 'academic', 'paper']):
+            return 'arxiv'
+        elif any(indicator in query_lower for indicator in ['technology', 'framework', 'programming']):
+            return 'github'
+        elif any(indicator in query_lower for indicator in ['what is', 'definition', 'history']):
+            return 'wikipedia'
+        else:
+            return 'web'  # Default fallback
+    def _get_relevant_tools(self, query: str) -> List[str]:
+        """Get list of relevant tools for deep search"""
+        relevant_tools = []
+        # Always include web search for current information
+        relevant_tools.append('web')
+        # Add specialized tools based on query
+        for tool_name, tool in self.tools.items():
+            if tool_name != 'web' and tool.should_use_for_query(query):
+                relevant_tools.append(tool_name)
+        # Ensure we don't overwhelm with too many sources
+        if len(relevant_tools) > 4:
+            # Prioritize specialized tools
+            priority_order = ['arxiv', 'sec', 'github', 'scholar', 'wikipedia', 'web']
+            relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
+        return relevant_tools
+    def _synthesize_multi_source_results(self, query: str, results: Dict[str, str], quality_scores: Dict[str, Dict]) -> str:
+        """Synthesize results from multiple research sources"""
+        synthesis = f"**Comprehensive Research Analysis: {query}**\n\n"
+        # Add source summary
+        synthesis += f"**Research Sources Used:** {', '.join(results.keys()).replace('_', ' ').title()}\n\n"
+        # Find key themes and agreements/disagreements
+        key_findings = self._extract_key_findings(results)
+        synthesis += self._format_key_findings(key_findings)
+        # Add individual source results (condensed)
+        synthesis += "**Detailed Source Results:**\n\n"
+        # Sort sources by quality score
+        sorted_sources = sorted(quality_scores.items(), key=lambda x: x[1]['overall'], reverse=True)
+        for source_name, _ in sorted_sources:
+            if source_name in results:
+                source_result = results[source_name]
+                quality = quality_scores[source_name]
+                # Condense long results
+                if len(source_result) > 800:
+                    source_result = source_result[:800] + "...\n[Result truncated for synthesis]"
+                synthesis += f"**{source_name.replace('_', ' ').title()} (Quality: {quality['overall']:.2f}/1.0):**\n"
+                synthesis += f"{source_result}\n\n"
+        # Add research quality assessment
+        synthesis += self._format_research_quality_assessment(quality_scores)
+        return synthesis
+    def _extract_key_findings(self, results: Dict[str, str]) -> Dict[str, List[str]]:
+        """Extract key findings and themes from multiple sources"""
+        findings = {
+            'agreements': [],
+            'contradictions': [],
+            'unique_insights': [],
+            'data_points': []
+        }
+        # Extract key sentences from each source
+        all_sentences = []
+        source_sentences = {}
+        for source, result in results.items():
+            sentences = self._extract_key_sentences(result)
+            source_sentences[source] = sentences
+            all_sentences.extend(sentences)
+        # Find common themes (simplified approach)
+        word_counts = Counter()
+        for sentence in all_sentences:
+            words = re.findall(r'\b\w{4,}\b', sentence.lower())  # Words 4+ chars
+            word_counts.update(words)
+        common_themes = [word for word, count in word_counts.most_common(10) if count > 1]
+        # Look for numerical data
+        numbers = re.findall(r'\b\d+(?:\.\d+)?%?\b', ' '.join(all_sentences))
+        findings['data_points'] = list(set(numbers))[:10]  # Top 10 unique numbers
+        # Simplified agreement detection
+        if len(source_sentences) > 1:
+            findings['agreements'] = [f"Multiple sources mention: {theme}" for theme in common_themes[:3]]
+        return findings
+    def _extract_key_sentences(self, text: str) -> List[str]:
+        """Extract key sentences from research text"""
+        if not text:
+            return []
+        # Split into sentences
+        sentences = re.split(r'[.!?]+', text)
+        # Filter for key sentences (containing important indicators)
+        key_indicators = [
+            'research shows', 'study found', 'according to', 'data indicates',
+            'results suggest', 'analysis reveals', 'evidence shows', 'reported that',
+            'concluded that', 'demonstrated that', 'increased', 'decreased',
+            'growth', 'decline', 'significant', 'important', 'critical'
+        ]
+        key_sentences = []
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if (len(sentence) > 30 and
+                any(indicator in sentence.lower() for indicator in key_indicators)):
+                key_sentences.append(sentence)
+        return key_sentences[:5]  # Top 5 key sentences
+    def _format_key_findings(self, findings: Dict[str, List[str]]) -> str:
+        """Format key findings summary"""
+        result = "**Key Research Synthesis:**\n\n"
+        if findings['agreements']:
+            result += "**Common Themes:**\n"
+            for agreement in findings['agreements']:
+                result += f"• {agreement}\n"
+            result += "\n"
+        if findings['data_points']:
+            result += "**Key Data Points:**\n"
+            for data in findings['data_points'][:5]:
+                result += f"• {data}\n"
+            result += "\n"
+        if findings['unique_insights']:
+            result += "**Unique Insights:**\n"
+            for insight in findings['unique_insights']:
+                result += f"• {insight}\n"
+            result += "\n"
+        return result
+    def _format_research_quality_assessment(self, quality_scores: Dict[str, Dict]) -> str:
+        """Format overall research quality assessment"""
+        if not quality_scores:
+            return ""
+        result = "**Research Quality Assessment:**\n\n"
+        # Calculate average quality metrics
+        avg_overall = sum(scores['overall'] for scores in quality_scores.values()) / len(quality_scores)
+        avg_authority = sum(scores['authority'] for scores in quality_scores.values()) / len(quality_scores)
+        avg_recency = sum(scores['recency'] for scores in quality_scores.values()) / len(quality_scores)
+        avg_specificity = sum(scores['specificity'] for scores in quality_scores.values()) / len(quality_scores)
+        result += f"• Overall Research Quality: {avg_overall:.2f}/1.0\n"
+        result += f"• Source Authority: {avg_authority:.2f}/1.0\n"
+        result += f"• Information Recency: {avg_recency:.2f}/1.0\n"
+        result += f"• Data Specificity: {avg_specificity:.2f}/1.0\n"
+        result += f"• Sources Consulted: {len(quality_scores)}\n\n"
+        # Quality interpretation
+        if avg_overall >= 0.8:
+            quality_level = "Excellent"
+        elif avg_overall >= 0.6:
+            quality_level = "Good"
+        elif avg_overall >= 0.4:
+            quality_level = "Moderate"
+        else:
+            quality_level = "Limited"
+        result += f"**Research Reliability: {quality_level}**\n"
+        if avg_authority >= 0.8:
+            result += "• High-authority sources with strong credibility\n"
+        if avg_recency >= 0.7:
+            result += "• Current and up-to-date information\n"
+        if avg_specificity >= 0.6:
+            result += "• Specific data points and quantitative evidence\n"
+        return result
+    def generate_research_queries(self, question: str, current_discussion: List[Dict]) -> List[str]:
+        """Auto-generate targeted research queries based on discussion gaps"""
+        # Analyze discussion for gaps
+        discussion_text = "\n".join([msg.get('text', '') for msg in current_discussion])
+        # Extract claims that need verification
+        unsubstantiated_claims = self._find_unsubstantiated_claims(discussion_text)
+        # Generate specific queries
+        queries = []
+        # Add queries for unsubstantiated claims
+        for claim in unsubstantiated_claims[:3]:
+            query = self._convert_claim_to_query(claim)
+            if query:
+                queries.append(query)
+        # Add queries for missing quantitative data
+        if not re.search(r'\d+%', discussion_text):
+            queries.append(f"{question} statistics data percentages")
+        # Add current trends query
+        queries.append(f"{question} 2024 2025 recent developments")
+        return queries[:3]  # Limit to 3 targeted queries
+    def _find_unsubstantiated_claims(self, discussion_text: str) -> List[str]:
+        """Find claims that might need research backing"""
+        claims = []
+        # Look for assertion patterns
+        assertion_patterns = [
+            r'(?:should|must|will|is|are)\s+[^.]{20,100}',
+            r'(?:studies show|research indicates|data suggests)\s+[^.]{20,100}',
+            r'(?:according to|based on)\s+[^.]{20,100}'
+        ]
+        for pattern in assertion_patterns:
+            matches = re.findall(pattern, discussion_text, re.IGNORECASE)
+            claims.extend(matches[:2])  # Limit matches per pattern
+        return claims
+    def _convert_claim_to_query(self, claim: str) -> Optional[str]:
+        """Convert a claim into a research query"""
+        if not claim or len(claim) < 10:
+            return None
+        # Extract key terms
+        key_terms = re.findall(r'\b\w{4,}\b', claim.lower())
+        if len(key_terms) < 2:
+            return None
+        # Create query from key terms
+        query_terms = key_terms[:4]  # Use first 4 meaningful terms
+        return " ".join(query_terms)
+    def prioritize_research_needs(self, expert_positions: List[Dict], question: str) -> List[str]:
+        """Identify and prioritize research that could resolve expert conflicts"""
+        # Extract expert claims
+        expert_claims = {}
+        for position in expert_positions:
+            speaker = position.get('speaker', 'Unknown')
+            text = position.get('text', '')
+            expert_claims[speaker] = self._extract_key_claims(text)
+        # Find disagreements
+        disagreements = self._find_expert_disagreements(expert_claims)
+        # Generate research priorities
+        priorities = []
+        for disagreement in disagreements[:3]:
+            # Create research query to resolve disagreement
+            query = f"{question} {disagreement['topic']} evidence data"
+            priorities.append(query)
+        return priorities
+    def _extract_key_claims(self, expert_text: str) -> List[str]:
+        """Extract key factual claims from expert response"""
+        if not expert_text:
+            return []
+        sentences = expert_text.split('.')
+        claims = []
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if (len(sentence) > 20 and
+                any(indicator in sentence.lower() for indicator in [
+                    'should', 'will', 'is', 'are', 'must', 'can', 'would', 'could'
+                ])):
+                claims.append(sentence)
+        return claims[:3]  # Top 3 claims
+    def _find_expert_disagreements(self, expert_claims: Dict[str, List[str]]) -> List[Dict]:
+        """Identify areas where experts disagree"""
+        disagreements = []
+        experts = list(expert_claims.keys())
+        for i, expert1 in enumerate(experts):
+            for expert2 in experts[i+1:]:
+                claims1 = expert_claims[expert1]
+                claims2 = expert_claims[expert2]
+                conflicts = self._find_conflicting_claims(claims1, claims2)
+                if conflicts:
+                    disagreements.append({
+                        'experts': [expert1, expert2],
+                        'topic': self._extract_conflict_topic(conflicts[0]),
+                        'conflicts': conflicts[:1]  # Just the main conflict
+                    })
+        return disagreements
+    def _find_conflicting_claims(self, claims1: List[str], claims2: List[str]) -> List[str]:
+        """Identify potentially conflicting claims (simplified)"""
+        conflicts = []
+        # Simple opposing sentiment detection
+        opposing_pairs = [
+            ('should', 'should not'), ('will', 'will not'), ('is', 'is not'),
+            ('increase', 'decrease'), ('better', 'worse'), ('yes', 'no'),
+            ('support', 'oppose'), ('benefit', 'harm'), ('effective', 'ineffective')
+        ]
+        for claim1 in claims1:
+            for claim2 in claims2:
+                for pos, neg in opposing_pairs:
+                    if pos in claim1.lower() and neg in claim2.lower():
+                        conflicts.append(f"{claim1} vs {claim2}")
+                    elif neg in claim1.lower() and pos in claim2.lower():
+                        conflicts.append(f"{claim1} vs {claim2}")
+        return conflicts
+    def _extract_conflict_topic(self, conflict: str) -> str:
+        """Extract the main topic from a conflict description"""
+        # Simple extraction of key terms
+        words = re.findall(r'\b\w{4,}\b', conflict.lower())
+        # Filter out common words
+        stopwords = {'should', 'will', 'would', 'could', 'this', 'that', 'with', 'from', 'they', 'them'}
+        topic_words = [word for word in words if word not in stopwords]
+        return " ".join(topic_words[:3])
+    def suggest_research_follow_ups(self, discussion_log: List[Dict], question: str) -> List[str]:
+        """Suggest additional research questions based on discussion patterns"""
+        # Get recent discussion
+        latest_messages = discussion_log[-6:] if len(discussion_log) > 6 else discussion_log
+        recent_text = "\n".join([msg.get('content', '') for msg in latest_messages])
+        follow_ups = []
+        # Look for unverified statistics
+        if re.search(r'\d+%', recent_text):
+            follow_ups.append(f"{question} statistics verification current data")
+        # Look for trend mentions
+        trend_keywords = ['trend', 'growing', 'increasing', 'declining', 'emerging']
+        if any(keyword in recent_text.lower() for keyword in trend_keywords):
+            follow_ups.append(f"{question} current trends 2024 2025")
+        # Look for example mentions
+        if 'example' in recent_text.lower() or 'case study' in recent_text.lower():
+            follow_ups.append(f"{question} case studies examples evidence")
+        return follow_ups[:3]
+    def get_tool_status(self) -> Dict[str, bool]:
+        """Get status of all research tools"""
+        return {
+            name: self.tool_status.get(name, True)
+            for name in self.tools.keys()
+        }
+    def test_tool_connections(self) -> Dict[str, str]:
+        """Test all research tool connections"""
+        results = {}
+        for name, tool in self.tools.items():
+            try:
+                # Simple test query
+                test_result = tool.search("test", max_results=1)
+                if test_result and len(test_result) > 20:
+                    results[name] = "✅ Working"
+                    self.tool_status[name] = True
+                else:
+                    results[name] = "⚠️ Limited response"
+                    self.tool_status[name] = False
+            except Exception as e:
+                results[name] = f"❌ Error: {str(e)[:50]}..."
+                self.tool_status[name] = False
+        return results

research_tools/scholar_search.py ADDED Viewed

	@@ -0,0 +1,256 @@

+"""
+Google Scholar Search Tool for academic research
+"""
+from .base_tool import BaseTool
+from typing import List, Dict, Optional
+try:
+    from scholarly import scholarly
+    SCHOLARLY_AVAILABLE = True
+except ImportError:
+    SCHOLARLY_AVAILABLE = False
+class GoogleScholarTool(BaseTool):
+    """Search Google Scholar for academic research papers"""
+    def __init__(self):
+        super().__init__("Google Scholar", "Search Google Scholar for academic research papers and citations")
+        self.available = SCHOLARLY_AVAILABLE
+        self.rate_limit_delay = 3.0  # Be very respectful to Google Scholar
+    def search(self, query: str, max_results: int = 4, **kwargs) -> str:
+        """Search Google Scholar for research papers"""
+        if not self.available:
+            return self._unavailable_response(query)
+        self.rate_limit()
+        try:
+            # Search for publications with timeout handling
+            search_query = scholarly.search_pubs(query)
+            papers = []
+            for i, paper in enumerate(search_query):
+                if i >= max_results:
+                    break
+                # Try to get additional info if available
+                try:
+                    # Some papers might need to be filled for complete info
+                    if hasattr(paper, 'fill') and callable(paper.fill):
+                        paper = paper.fill()
+                except:
+                    # If fill fails, use paper as-is
+                    pass
+                papers.append(paper)
+            if papers:
+                result = f"**Google Scholar Research for: {query}**\n\n"
+                result += self._format_scholar_results(papers)
+                result += self._analyze_research_quality(papers)
+                return result
+            else:
+                return f"**Google Scholar Research for: {query}**\n\nNo relevant academic papers found."
+        except Exception as e:
+            error_msg = str(e)
+            if "blocked" in error_msg.lower() or "captcha" in error_msg.lower():
+                return f"**Google Scholar Research for: {query}**\n\nGoogle Scholar is temporarily blocking automated requests. This is normal behavior. Academic research is available through other sources like arXiv."
+            elif "timeout" in error_msg.lower():
+                return f"**Google Scholar Research for: {query}**\n\nRequest timeout - Google Scholar may be experiencing high load. Academic research available but slower than expected."
+            else:
+                return self.format_error_response(query, str(e))
+    def _unavailable_response(self, query: str) -> str:
+        """Response when scholarly library is not available"""
+        result = f"**Google Scholar Research for: {query}**\n\n"
+        result += "**Library Not Available**\n"
+        result += "Google Scholar integration requires the 'scholarly' library.\n\n"
+        result += "**Installation Instructions:**\n"
+        result += "```bash\n"
+        result += "pip install scholarly\n"
+        result += "```\n\n"
+        result += "**Alternative Academic Sources:**\n"
+        result += "• arXiv (for preprints and technical papers)\n"
+        result += "• PubMed (for medical and life sciences)\n"
+        result += "• IEEE Xplore (for engineering and computer science)\n"
+        result += "• JSTOR (for humanities and social sciences)\n\n"
+        result += "**Research Recommendation:**\n"
+        result += f"For the query '{query}', consider searching:\n"
+        result += "• Recent academic publications\n"
+        result += "• Peer-reviewed research articles\n"
+        result += "• Citation networks and impact metrics\n\n"
+        return result
+    def _format_scholar_results(self, papers: List[Dict]) -> str:
+        """Format Google Scholar search results"""
+        result = ""
+        for i, paper in enumerate(papers, 1):
+            # Extract paper information safely with better handling
+            title = paper.get('title', paper.get('bib', {}).get('title', 'Unknown Title'))
+            # Handle authors more robustly
+            authors = self._format_authors(paper.get('author', paper.get('bib', {}).get('author', [])))
+            # Get year from multiple possible locations
+            year = (paper.get('year') or
+                   paper.get('bib', {}).get('pub_year') or
+                   paper.get('bib', {}).get('year') or
+                   'Unknown Year')
+            # Get venue from multiple possible locations
+            venue = (paper.get('venue') or
+                    paper.get('bib', {}).get('venue') or
+                    paper.get('bib', {}).get('journal') or
+                    paper.get('bib', {}).get('booktitle') or
+                    'Unknown Venue')
+            citations = paper.get('num_citations', paper.get('citedby', 0))
+            result += f"**Paper {i}: {title}**\n"
+            result += f"Authors: {authors}\n"
+            result += f"Year: {year} | Venue: {venue}\n"
+            result += f"Citations: {citations:,}\n"
+            # Add abstract if available
+            abstract = (paper.get('abstract') or
+                       paper.get('bib', {}).get('abstract') or
+                       paper.get('summary'))
+            if abstract and len(str(abstract).strip()) > 10:
+                abstract_text = str(abstract)
+                if len(abstract_text) > 300:
+                    abstract_text = abstract_text[:300] + "..."
+                result += f"Abstract: {abstract_text}\n"
+            # Add URL if available
+            url = (paper.get('url') or
+                  paper.get('pub_url') or
+                  paper.get('eprint_url'))
+            if url:
+                result += f"URL: {url}\n"
+            result += "\n"
+        return result
+    def _format_authors(self, authors) -> str:
+        """Format author list safely with improved handling"""
+        if not authors:
+            return "Unknown Authors"
+        if isinstance(authors, str):
+            return authors
+        elif isinstance(authors, list):
+            # Handle list of author dictionaries or strings
+            author_names = []
+            for author in authors[:5]:  # Limit to first 5 authors
+                if isinstance(author, dict):
+                    # Try different possible name fields
+                    name = (author.get('name') or
+                           author.get('full_name') or
+                           author.get('firstname', '') + ' ' + author.get('lastname', '') or
+                           str(author))
+                    name = name.strip()
+                else:
+                    name = str(author).strip()
+                if name and name != 'Unknown Authors':
+                    author_names.append(name)
+            if not author_names:
+                return "Unknown Authors"
+            if len(authors) > 5:
+                author_names.append("et al.")
+            return ", ".join(author_names)
+        else:
+            return str(authors) if authors else "Unknown Authors"
+    def _analyze_research_quality(self, papers: List[Dict]) -> str:
+        """Analyze the quality and impact of research results"""
+        if not papers:
+            return ""
+        # Calculate citation metrics
+        citations = [paper.get('num_citations', 0) for paper in papers]
+        total_citations = sum(citations)
+        avg_citations = total_citations / len(papers) if papers else 0
+        high_impact_papers = sum(1 for c in citations if c > 100)
+        # Analyze publication years
+        years = [paper.get('year') for paper in papers if paper.get('year')]
+        recent_papers = sum(1 for year in years if isinstance(year, (int, str)) and str(year) in ['2023', '2024', '2025'])
+        # Analyze venues
+        venues = [paper.get('venue', '') for paper in papers]
+        unique_venues = len(set(v for v in venues if v and v != 'Unknown Venue'))
+        result = f"**Research Quality Analysis:**\n"
+        result += f"• Papers analyzed: {len(papers)}\n"
+        result += f"• Total citations: {total_citations:,}\n"
+        result += f"• Average citations per paper: {avg_citations:.1f}\n"
+        result += f"• High-impact papers (>100 citations): {high_impact_papers}\n"
+        result += f"• Recent publications (2023-2025): {recent_papers}\n"
+        result += f"• Venue diversity: {unique_venues} different publication venues\n"
+        # Research quality assessment
+        if avg_citations > 50:
+            quality_level = "High Impact"
+        elif avg_citations > 20:
+            quality_level = "Moderate Impact"
+        elif avg_citations > 5:
+            quality_level = "Emerging Research"
+        else:
+            quality_level = "Early Stage"
+        result += f"• Research maturity: {quality_level}\n"
+        # Authority assessment
+        if high_impact_papers > 0 and recent_papers > 0:
+            authority = "High - Established field with recent developments"
+        elif high_impact_papers > 0:
+            authority = "Moderate - Established field, may need recent updates"
+        elif recent_papers > 0:
+            authority = "Emerging - New research area with growing interest"
+        else:
+            authority = "Limited - Sparse academic coverage"
+        result += f"• Academic authority: {authority}\n\n"
+        return result
+    def should_use_for_query(self, query: str) -> bool:
+        """Google Scholar is good for academic research, citations, and scholarly articles"""
+        academic_indicators = [
+            'research', 'study', 'academic', 'paper', 'journal', 'peer-reviewed',
+            'citation', 'scholar', 'university', 'professor', 'phd', 'thesis',
+            'methodology', 'experiment', 'analysis', 'theory', 'empirical',
+            'literature review', 'meta-analysis', 'systematic review',
+            'conference', 'publication', 'scholarly'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in academic_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from Scholar results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for Scholar-specific patterns
+            base_info.update({
+                'has_citations': 'Citations:' in text,
+                'has_abstracts': 'Abstract:' in text,
+                'has_venues': 'Venue:' in text,
+                'has_recent_papers': any(year in text for year in ['2023', '2024', '2025']),
+                'has_high_impact': any(citation in text for citation in ['100', '200', '500', '1000']),
+                'is_available': 'Library Not Available' not in text,
+                'paper_count': text.count('**Paper')
+            })
+        return base_info

research_tools/sec_search.py ADDED Viewed

	@@ -0,0 +1,340 @@

+"""
+SEC Edgar Filings Search Tool for financial and company data
+"""
+from .base_tool import BaseTool
+import requests
+import json
+import re
+from typing import Dict, List, Optional
+class SECSearchTool(BaseTool):
+    """Search SEC EDGAR filings for company financial information"""
+    def __init__(self):
+        super().__init__("SEC EDGAR", "Search SEC filings and financial data for public companies")
+        self.base_url = "https://data.sec.gov"
+        self.headers = {
+            'User-Agent': 'Research Tool [email protected]',  # SEC requires User-Agent
+            'Accept-Encoding': 'gzip, deflate'
+        }
+        self.rate_limit_delay = 3.0  # SEC is strict about rate limiting
+    def search(self, company_name: str, **kwargs) -> str:
+        """Search SEC filings for company information"""
+        self.rate_limit()
+        try:
+            # First attempt to find company CIK
+            cik_data = self._find_company_cik(company_name)
+            if not cik_data:
+                return self._fallback_company_search(company_name)
+            # Get company submissions
+            submissions = self._get_company_submissions(cik_data['cik'])
+            if submissions:
+                return self._format_sec_results(company_name, cik_data, submissions)
+            else:
+                return self._fallback_company_search(company_name)
+        except requests.RequestException as e:
+            # Handle network errors gracefully
+            if "404" in str(e):
+                return self._fallback_company_search(company_name)
+            return self.format_error_response(company_name, f"Network error accessing SEC: {str(e)}")
+        except Exception as e:
+            return self.format_error_response(company_name, str(e))
+    def _find_company_cik(self, company_name: str) -> Optional[Dict]:
+        """Find company CIK (Central Index Key) from company name"""
+        try:
+            # Use the correct SEC company tickers endpoint
+            tickers_url = "https://www.sec.gov/files/company_tickers_exchange.json"
+            response = requests.get(tickers_url, headers=self.headers, timeout=15)
+            response.raise_for_status()
+            tickers_data = response.json()
+            # Search for company by name (fuzzy matching)
+            company_lower = company_name.lower()
+            # Handle the exchange data format
+            if isinstance(tickers_data, dict):
+                # Check if it's the fields/data format
+                if 'fields' in tickers_data and 'data' in tickers_data:
+                    return self._search_exchange_format(tickers_data, company_lower)
+                else:
+                    # Try direct dictionary format
+                    return self._search_direct_format(tickers_data, company_lower)
+            elif isinstance(tickers_data, list):
+                # Handle list format
+                return self._search_list_format(tickers_data, company_lower)
+            return None
+        except Exception as e:
+            print(f"Error finding company CIK: {e}")
+            return self._fallback_company_lookup(company_name)
+    def _fallback_company_lookup(self, company_name: str) -> Optional[Dict]:
+        """Fallback company lookup using known major companies"""
+        # Hardcoded CIKs for major companies for testing/demo purposes
+        known_companies = {
+            'apple': {'cik': '0000320193', 'ticker': 'AAPL', 'title': 'Apple Inc.'},
+            'microsoft': {'cik': '0000789019', 'ticker': 'MSFT', 'title': 'Microsoft Corporation'},
+            'tesla': {'cik': '0001318605', 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
+            'amazon': {'cik': '0001018724', 'ticker': 'AMZN', 'title': 'Amazon.com, Inc.'},
+            'google': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
+            'alphabet': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
+            'meta': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
+            'facebook': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
+            'nvidia': {'cik': '0001045810', 'ticker': 'NVDA', 'title': 'NVIDIA Corporation'},
+            'netflix': {'cik': '0001065280', 'ticker': 'NFLX', 'title': 'Netflix, Inc.'}
+        }
+        company_key = company_name.lower().strip()
+        for key, data in known_companies.items():
+            if key in company_key or company_key in key:
+                return data
+        return None
+    def _search_exchange_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]:
+        """Search in exchange ticker data format"""
+        try:
+            fields = tickers_data.get('fields', [])
+            data = tickers_data.get('data', [])
+            # Find field indices
+            cik_idx = None
+            ticker_idx = None
+            name_idx = None
+            for i, field in enumerate(fields):
+                if field.lower() in ['cik', 'cik_str']:
+                    cik_idx = i
+                elif field.lower() in ['ticker', 'symbol']:
+                    ticker_idx = i
+                elif field.lower() in ['name', 'title', 'company']:
+                    name_idx = i
+            # Search through data
+            for row in data:
+                if len(row) > max(filter(None, [cik_idx, ticker_idx, name_idx])):
+                    name = str(row[name_idx]).lower() if name_idx is not None else ""
+                    ticker = str(row[ticker_idx]).lower() if ticker_idx is not None else ""
+                    if (company_lower in name or
+                        name in company_lower or
+                        company_lower == ticker or
+                        any(word in name for word in company_lower.split() if len(word) > 3)):
+                        cik = str(row[cik_idx]) if cik_idx is not None else ""
+                        return {
+                            'cik': cik.zfill(10),
+                            'ticker': row[ticker_idx] if ticker_idx is not None else "",
+                            'title': row[name_idx] if name_idx is not None else ""
+                        }
+        except (ValueError, IndexError) as e:
+            print(f"Error parsing exchange format: {e}")
+        return None
+    def _search_direct_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]:
+        """Search in direct dictionary format"""
+        for key, entry in tickers_data.items():
+            if isinstance(entry, dict):
+                title = entry.get('title', entry.get('name', '')).lower()
+                ticker = entry.get('ticker', entry.get('symbol', '')).lower()
+                if (company_lower in title or
+                    title in company_lower or
+                    company_lower == ticker or
+                    any(word in title for word in company_lower.split() if len(word) > 3)):
+                    return {
+                        'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10),
+                        'ticker': entry.get('ticker', entry.get('symbol', '')),
+                        'title': entry.get('title', entry.get('name', ''))
+                    }
+        return None
+    def _search_list_format(self, tickers_data: list, company_lower: str) -> Optional[Dict]:
+        """Search in list format"""
+        for entry in tickers_data:
+            if isinstance(entry, dict):
+                title = entry.get('title', entry.get('name', '')).lower()
+                ticker = entry.get('ticker', entry.get('symbol', '')).lower()
+                if (company_lower in title or
+                    title in company_lower or
+                    company_lower == ticker or
+                    any(word in title for word in company_lower.split() if len(word) > 3)):
+                    return {
+                        'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10),
+                        'ticker': entry.get('ticker', entry.get('symbol', '')),
+                        'title': entry.get('title', entry.get('name', ''))
+                    }
+        return None
+    def _get_company_submissions(self, cik: str) -> Optional[Dict]:
+        """Get company submission data from SEC"""
+        try:
+            submissions_url = f"{self.base_url}/submissions/CIK{cik}.json"
+            response = requests.get(submissions_url, headers=self.headers, timeout=15)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"Error getting company submissions: {e}")
+            return None
+    def _format_sec_results(self, company_name: str, cik_data: Dict, submissions: Dict) -> str:
+        """Format SEC filing results"""
+        result = f"**SEC Financial Data for: {company_name}**\n\n"
+        # Company information
+        result += f"**Company Information:**\n"
+        result += f"• Official Name: {cik_data['title']}\n"
+        result += f"• Ticker Symbol: {cik_data.get('ticker', 'N/A')}\n"
+        result += f"• CIK: {cik_data['cik']}\n"
+        # Business information
+        if 'description' in submissions:
+            business_desc = submissions['description'][:300] + "..." if len(submissions.get('description', '')) > 300 else submissions.get('description', 'Not available')
+            result += f"• Business Description: {business_desc}\n"
+        result += f"• Industry: {submissions.get('sic', 'Not specified')}\n"
+        result += f"• Fiscal Year End: {submissions.get('fiscalYearEnd', 'Not specified')}\n\n"
+        # Recent filings analysis
+        recent_filings = self._analyze_recent_filings(submissions)
+        result += recent_filings
+        # Financial highlights
+        financial_highlights = self._extract_financial_highlights(submissions)
+        result += financial_highlights
+        return result
+    def _analyze_recent_filings(self, submissions: Dict) -> str:
+        """Analyze recent SEC filings"""
+        result = "**Recent SEC Filings:**\n"
+        # Get recent filings
+        recent_filings = submissions.get('filings', {}).get('recent', {})
+        if not recent_filings:
+            return result + "• No recent filings available\n\n"
+        forms = recent_filings.get('form', [])
+        filing_dates = recent_filings.get('filingDate', [])
+        accession_numbers = recent_filings.get('accessionNumber', [])
+        # Analyze key filing types
+        key_forms = ['10-K', '10-Q', '8-K', 'DEF 14A']
+        recent_key_filings = []
+        for i, form in enumerate(forms[:20]):  # Check last 20 filings
+            if form in key_forms and i < len(filing_dates):
+                recent_key_filings.append({
+                    'form': form,
+                    'date': filing_dates[i],
+                    'accession': accession_numbers[i] if i < len(accession_numbers) else 'N/A'
+                })
+        if recent_key_filings:
+            for filing in recent_key_filings[:5]:  # Show top 5
+                form_description = {
+                    '10-K': 'Annual Report',
+                    '10-Q': 'Quarterly Report',
+                    '8-K': 'Current Report',
+                    'DEF 14A': 'Proxy Statement'
+                }.get(filing['form'], filing['form'])
+                result += f"• {filing['form']} ({form_description}) - Filed: {filing['date']}\n"
+        else:
+            result += "• No key financial filings found in recent submissions\n"
+        result += "\n"
+        return result
+    def _extract_financial_highlights(self, submissions: Dict) -> str:
+        """Extract financial highlights from submission data"""
+        result = "**Financial Data Analysis:**\n"
+        # This is a simplified version - full implementation would parse actual financial data
+        result += "• Filing Status: Active public company\n"
+        result += "• Regulatory Compliance: Current with SEC requirements\n"
+        # Check for recent financial filings
+        recent_filings = submissions.get('filings', {}).get('recent', {})
+        if recent_filings:
+            forms = recent_filings.get('form', [])
+            annual_reports = sum(1 for form in forms if form == '10-K')
+            quarterly_reports = sum(1 for form in forms if form == '10-Q')
+            result += f"• Annual Reports (10-K): {annual_reports} on file\n"
+            result += f"• Quarterly Reports (10-Q): {quarterly_reports} on file\n"
+        result += "• Note: Detailed financial metrics require parsing individual filing documents\n\n"
+        result += "**Investment Research Notes:**\n"
+        result += "• Use SEC filings for: revenue trends, risk factors, management discussion\n"
+        result += "• Key documents: 10-K (annual), 10-Q (quarterly), 8-K (material events)\n"
+        result += "• Combine with market data for comprehensive analysis\n\n"
+        return result
+    def _fallback_company_search(self, company_name: str) -> str:
+        """Fallback response when company not found in SEC database"""
+        result = f"**SEC Financial Research for: {company_name}**\n\n"
+        result += f"**Company Search Results:**\n"
+        result += f"• Company '{company_name}' not found in SEC EDGAR database\n"
+        result += f"• This may indicate the company is:\n"
+        result += f"  - Private company (not required to file with SEC)\n"
+        result += f"  - Foreign company not listed on US exchanges\n"
+        result += f"  - Subsidiary of another public company\n"
+        result += f"  - Different legal name than search term\n\n"
+        result += f"**Alternative Research Suggestions:**\n"
+        result += f"• Search for parent company or holding company\n"
+        result += f"• Check if company trades under different ticker symbol\n"
+        result += f"• Use company's full legal name for search\n"
+        result += f"• Consider private company databases for non-public entities\n\n"
+        return result
+    def should_use_for_query(self, query: str) -> bool:
+        """SEC is good for public company financial and business information"""
+        financial_indicators = [
+            'company', 'financial', 'revenue', 'earnings', 'profit', 'stock',
+            'investment', 'market cap', 'sec filing', 'annual report',
+            'quarterly', 'balance sheet', 'income statement', 'cash flow',
+            'public company', 'ticker', 'investor', 'shareholder'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in financial_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from SEC results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for SEC-specific patterns
+            base_info.update({
+                'has_ticker': any(pattern in text for pattern in ['Ticker Symbol:', 'ticker']),
+                'has_cik': 'CIK:' in text,
+                'has_filings': any(form in text for form in ['10-K', '10-Q', '8-K']),
+                'is_public_company': 'public company' in text.lower(),
+                'has_financial_data': any(term in text.lower() for term in ['revenue', 'earnings', 'financial']),
+                'company_found': 'not found in SEC' not in text
+            })
+        return base_info

research_tools/web_search.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+Web Search Tool using DuckDuckGo via smolagents
+"""
+from .base_tool import BaseTool
+from typing import Optional
+from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, VisitWebpageTool
+class WebSearchTool(BaseTool):
+    """Web search using DuckDuckGo via smolagents"""
+    def __init__(self):
+        super().__init__("Web Search", "Search the web for current information using DuckDuckGo")
+        self.rate_limit_delay = 2.0  # Longer delay for web searches
+        try:
+            self.agent = CodeAgent(
+                tools=[
+                    DuckDuckGoSearchTool(),
+                    VisitWebpageTool(),
+                    FinalAnswerTool()
+                ],
+                model=InferenceClientModel(),
+                max_steps=3,
+                verbosity_level=0
+            )
+        except Exception as e:
+            print(f"Warning: Could not initialize web search agent: {e}")
+            self.agent = None
+    def search(self, query: str, max_results: int = 5, **kwargs) -> str:
+        """Use the CodeAgent to perform comprehensive web search and analysis"""
+        if not self.agent:
+            return self.format_error_response(query, "Web search agent not available. Please check dependencies.")
+        self.rate_limit()
+        try:
+            # Simplified prompt for better reliability
+            agent_prompt = f"Search the web for current information about: {query}. Provide a comprehensive summary of the most relevant and recent findings."
+            # Run the agent
+            result = self.agent.run(agent_prompt)
+            # Clean and validate the result
+            if result and isinstance(result, str) and len(result.strip()) > 0:
+                # Remove any code-like syntax that might cause parsing errors
+                cleaned_result = result.replace('```', '').replace('`', '').strip()
+                return f"**Web Search Results for: {query}**\n\n{cleaned_result}"
+            else:
+                return f"**Web Search for: {query}**\n\nNo clear results found. Please try a different search term."
+        except Exception as e:
+            # More robust fallback
+            error_msg = str(e)
+            if "max steps" in error_msg.lower():
+                return f"**Web Search for: {query}**\n\nSearch completed but reached complexity limit. Basic analysis: This query relates to {query.lower()} and would benefit from further investigation."
+            elif "syntax" in error_msg.lower():
+                return f"**Web Search for: {query}**\n\nSearch encountered formatting issues but found relevant information about {query.lower()}."
+            else:
+                return self.format_error_response(query, error_msg)
+    def should_use_for_query(self, query: str) -> bool:
+        """Web search is good for current events, news, and general information"""
+        current_indicators = ['news', 'recent', 'latest', 'current', 'today', '2024', '2025']
+        general_indicators = ['what is', 'how to', 'guide', 'tutorial', 'review']
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in current_indicators + general_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from web search results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for news-specific patterns
+            base_info.update({
+                'has_news_keywords': bool(any(word in text.lower() for word in ['breaking', 'report', 'announced', 'according to'])),
+                'has_quotes': text.count('"') > 1,
+                'has_sources': bool(any(source in text.lower() for source in ['reuters', 'bloomberg', 'bbc', 'cnn', 'associated press']))
+            })
+        return base_info

research_tools/wikipedia_search.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+Wikipedia Search Tool for comprehensive background information
+"""
+from .base_tool import BaseTool
+from typing import Optional
+class WikipediaSearchTool(BaseTool):
+    """Search Wikipedia for comprehensive background information"""
+    def __init__(self):
+        super().__init__("Wikipedia", "Search Wikipedia for comprehensive background information and authoritative data")
+        self.rate_limit_delay = 1.0
+    def search(self, query: str, max_results: int = 3, **kwargs) -> str:
+        """Search Wikipedia for comprehensive information"""
+        self.rate_limit()
+        try:
+            import wikipedia
+            # Search for the topic
+            search_results = wikipedia.search(query, results=max_results)
+            if not search_results:
+                return f"**Wikipedia Research for: {query}**\n\nNo Wikipedia articles found for: {query}"
+            result = f"**Wikipedia Research for: {query}**\n\n"
+            for i, search_term in enumerate(search_results[:max_results]):
+                try:
+                    # Get the page
+                    page = wikipedia.page(search_term)
+                    summary = page.summary[:800] + "..." if len(page.summary) > 800 else page.summary
+                    result += f"**Article {i+1}: {page.title}**\n"
+                    result += f"{summary}\n"
+                    result += f"Source: {page.url}\n\n"
+                except wikipedia.exceptions.DisambiguationError as e:
+                    # Handle disambiguation pages
+                    try:
+                        page = wikipedia.page(e.options[0])
+                        summary = page.summary[:600] + "..." if len(page.summary) > 600 else page.summary
+                        result += f"**Article {i+1}: {page.title}**\n"
+                        result += f"{summary}\n"
+                        result += f"Source: {page.url}\n\n"
+                    except:
+                        result += f"**Article {i+1}:** Multiple options found for '{search_term}'\n\n"
+                except wikipedia.exceptions.PageError:
+                    result += f"**Article {i+1}:** Page not found for '{search_term}'\n\n"
+                except Exception as e:
+                    result += f"**Article {i+1}:** Error accessing '{search_term}': {str(e)[:50]}...\n\n"
+            return result
+        except ImportError:
+            return f"**Wikipedia Research for: {query}**\n\nWikipedia library not available. Please install with: pip install wikipedia\n\n"
+        except Exception as e:
+            return self.format_error_response(query, str(e))
+    def should_use_for_query(self, query: str) -> bool:
+        """Wikipedia is good for factual, historical, and encyclopedic information"""
+        encyclopedic_indicators = [
+            'what is', 'who is', 'history of', 'definition', 'background',
+            'overview', 'explain', 'about', 'biography', 'concept'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in encyclopedic_indicators)
+    def extract_key_info(self, text: str) -> dict:
+        """Extract key information from Wikipedia results"""
+        base_info = super().extract_key_info(text)
+        if text:
+            # Look for Wikipedia-specific patterns
+            base_info.update({
+                'has_categories': 'Category:' in text,
+                'has_references': any(ref in text for ref in ['Retrieved', 'Archived', 'ISBN']),
+                'is_biographical': any(bio in text.lower() for bio in ['born', 'died', 'biography', 'life']),
+                'is_historical': any(hist in text.lower() for hist in ['century', 'founded', 'established', 'ancient']),
+                'article_count': text.count('**Article')
+            })
+        return base_info

test_research_tools.py ADDED Viewed

	@@ -0,0 +1,337 @@

+#!/usr/bin/env python3
+"""
+Test Script for Enhanced Research Tools
+Run this to verify all research tools are working correctly
+"""
+import sys
+import os
+import time
+from typing import Dict
+# Add current directory to path for imports
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+try:
+    from research_tools import EnhancedResearchAgent
+    from enhanced_search_functions import get_function_definitions, get_function_names
+    IMPORTS_OK = True
+except ImportError as e:
+    print(f"❌ Import Error: {e}")
+    print("Make sure all research_tools files are in place!")
+    IMPORTS_OK = False
+def test_tool_imports():
+    """Test that all tools can be imported"""
+    print("🔍 Testing Tool Imports...")
+    if not IMPORTS_OK:
+        return False
+    try:
+        from research_tools.web_search import WebSearchTool
+        from research_tools.wikipedia_search import WikipediaSearchTool
+        from research_tools.arxiv_search import ArxivSearchTool
+        from research_tools.github_search import GitHubSearchTool
+        from research_tools.sec_search import SECSearchTool
+        from research_tools.scholar_search import GoogleScholarTool
+        print("✅ All tool imports successful")
+        return True
+    except ImportError as e:
+        print(f"❌ Tool import failed: {e}")
+        return False
+def test_enhanced_research_agent():
+    """Test the main research agent"""
+    print("\n🤖 Testing Enhanced Research Agent...")
+    if not IMPORTS_OK:
+        return False
+    try:
+        agent = EnhancedResearchAgent()
+        print(f"✅ Research agent created with {len(agent.tools)} tools")
+        # Test tool status
+        status = agent.get_tool_status()
+        print(f"✅ Tool status check: {len(status)} tools available")
+        return True
+    except Exception as e:
+        print(f"❌ Research agent creation failed: {e}")
+        return False
+def test_function_definitions():
+    """Test function definitions"""
+    print("\n📋 Testing Function Definitions...")
+    try:
+        functions = get_function_definitions()
+        function_names = get_function_names()
+        print(f"✅ {len(functions)} function definitions loaded")
+        print(f"✅ Function names: {', '.join(function_names)}")
+        # Verify structure
+        for func in functions:
+            assert "type" in func
+            assert "function" in func
+            assert "name" in func["function"]
+            assert "parameters" in func["function"]
+        print("✅ All function definitions have correct structure")
+        return True
+    except Exception as e:
+        print(f"❌ Function definition test failed: {e}")
+        return False
+def test_individual_tools():
+    """Test each research tool individually"""
+    print("\n🔧 Testing Individual Tools...")
+    if not IMPORTS_OK:
+        return False
+    results = {}
+    try:
+        agent = EnhancedResearchAgent()
+        # Quick test queries for each tool
+        test_queries = {
+            'web': ('AI news 2024', {}),
+            'wikipedia': ('artificial intelligence', {}),
+            'arxiv': ('machine learning', {}),
+            'github': ('python', {}),
+            'sec': ('Apple', {}),  # Remove max_results for SEC
+            'scholar': ('deep learning', {})
+        }
+        for tool_name, (query, kwargs) in test_queries.items():
+            print(f"  Testing {tool_name}...")
+            try:
+                # Quick test with timeout
+                start_time = time.time()
+                if tool_name == 'sec':
+                    # SEC tool only accepts company_name parameter
+                    result = agent.tools[tool_name].search(query)
+                else:
+                    result = agent.tools[tool_name].search(query, max_results=1)
+                duration = time.time() - start_time
+                if result and len(result) > 50:
+                    print(f"    ✅ {tool_name}: '{result}' Working ({duration:.1f}s)")
+                    results[tool_name] = "✅ Working"
+                else:
+                    print(f"    ⚠️ {tool_name}: Limited response")
+                    results[tool_name] = "⚠️ Limited"
+            except Exception as e:
+                print(f"    ❌ {tool_name}: Error - {str(e)[:50]}...")
+                results[tool_name] = f"❌ Error"
+        working_tools = sum(1 for status in results.values() if "✅" in status)
+        print(f"\n📊 Tool Test Results: {working_tools}/{len(test_queries)} tools working")
+        return working_tools > 0
+    except Exception as e:
+        print(f"❌ Individual tool testing failed: {e}")
+        return False
+def test_smart_routing():
+    """Test smart query routing"""
+    print("\n🎯 Testing Smart Query Routing...")
+    if not IMPORTS_OK:
+        return False
+    try:
+        agent = EnhancedResearchAgent()
+        test_cases = [
+            ("What is machine learning?", "wikipedia"),  # Definitional
+            ("Latest AI research papers", "arxiv"),      # Academic
+            ("React vs Vue popularity", "github"),       # Technology
+            ("Tesla stock performance", "sec"),          # Financial
+            ("Current AI news", "web")                   # Current events
+        ]
+        correct_routes = 0
+        for query, expected_tool in test_cases:
+            routed_tool = agent._route_query_to_tool(query)
+            if routed_tool == expected_tool:
+                print(f"  ✅ '{query}' → {routed_tool}")
+                correct_routes += 1
+            else:
+                print(f"  ⚠️ '{query}' → {routed_tool} (expected {expected_tool})")
+        print(f"\n📊 Routing accuracy: {correct_routes}/{len(test_cases)} correct")
+        return correct_routes >= len(test_cases) // 2  # At least 50% correct
+    except Exception as e:
+        print(f"❌ Smart routing test failed: {e}")
+        return False
+def test_multi_source_research():
+    """Test multi-source research synthesis"""
+    print("\n🌐 Testing Multi-Source Research...")
+    if not IMPORTS_OK:
+        return False
+    try:
+        agent = EnhancedResearchAgent()
+        print("  Running deep research test (this may take 10-15 seconds)...")
+        result = agent.search("artificial intelligence benefits", research_depth="deep")
+        if result and len(result) > 200:
+            # Check for multi-source indicators
+            source_indicators = ["Web Search", "Wikipedia", "arXiv", "Research Sources Used"]
+            found_sources = sum(1 for indicator in source_indicators if indicator in result)
+            if found_sources >= 2:
+                print(f"  ✅ Multi-source synthesis working ({found_sources} sources detected)")
+                return True
+            else:
+                print(f"  ⚠️ Limited multi-source synthesis ({found_sources} sources)")
+                return False
+        else:
+            print("  ❌ Multi-source research returned insufficient data")
+            return False
+    except Exception as e:
+        print(f"❌ Multi-source research test failed: {e}")
+        return False
+def test_quality_scoring():
+    """Test research quality scoring"""
+    print("\n📊 Testing Quality Scoring...")
+    if not IMPORTS_OK:
+        return False
+    try:
+        agent = EnhancedResearchAgent()
+        # Test quality scoring on a sample text
+        sample_text = """
+        Recent research from Stanford University published in 2024 shows that
+        artificial intelligence accuracy increased by 23% compared to 2023 data.
+        The study, published in Nature, analyzed 1,000 AI models and found
+        significant improvements in neural network architectures.
+        """
+        quality_score = agent.tools['web'].score_research_quality(sample_text, 'web')
+        print(f"  Sample quality score: {quality_score}")
+        # Verify scoring structure
+        required_metrics = ['recency', 'authority', 'specificity', 'relevance', 'overall']
+        for metric in required_metrics:
+            if metric not in quality_score:
+                print(f"  ❌ Missing metric: {metric}")
+                return False
+            if not 0 <= quality_score[metric] <= 1:
+                print(f"  ❌ Invalid score for {metric}: {quality_score[metric]}")
+                return False
+        print("  ✅ Quality scoring structure correct")
+        print(f"  ✅ Overall quality: {quality_score['overall']:.2f}/1.0")
+        return True
+    except Exception as e:
+        print(f"❌ Quality scoring test failed: {e}")
+        return False
+def test_dependency_check():
+    """Check for required dependencies"""
+    print("\n📦 Testing Dependencies...")
+    dependencies = {
+        'requests': 'HTTP requests',
+        'xml.etree.ElementTree': 'XML parsing (built-in)',
+        'wikipedia': 'Wikipedia search',
+        'scholarly': 'Google Scholar (optional)',
+        'smolagents': 'Web search agents'
+    }
+    missing_deps = []
+    for dep, description in dependencies.items():
+        try:
+            if dep == 'xml.etree.ElementTree':
+                import xml.etree.ElementTree
+            else:
+                __import__(dep)
+            print(f"  ✅ {dep}: {description}")
+        except ImportError:
+            print(f"  ❌ {dep}: {description} - MISSING")
+            missing_deps.append(dep)
+    if missing_deps:
+        print(f"\n⚠️ Missing dependencies: {', '.join(missing_deps)}")
+        print("Install with: pip install " + " ".join(dep for dep in missing_deps if dep not in ['xml.etree.ElementTree']))
+        return False
+    else:
+        print("  ✅ All dependencies available")
+        return True
+def run_full_test_suite():
+    """Run the complete test suite"""
+    print("🧪 Enhanced Research Tools - Test Suite")
+    print("=" * 50)
+    tests = [
+        ("Dependency Check", test_dependency_check),
+        ("Tool Imports", test_tool_imports),
+        ("Research Agent", test_enhanced_research_agent),
+        ("Function Definitions", test_function_definitions),
+        ("Individual Tools", test_individual_tools),
+        ("Smart Routing", test_smart_routing),
+        ("Quality Scoring", test_quality_scoring),
+        ("Multi-Source Research", test_multi_source_research)
+    ]
+    passed = 0
+    total = len(tests)
+    for test_name, test_func in tests:
+        print(f"\n{'='*20} {test_name} {'='*20}")
+        try:
+            if test_func():
+                passed += 1
+                print(f"✅ {test_name} PASSED")
+            else:
+                print(f"❌ {test_name} FAILED")
+        except Exception as e:
+            print(f"💥 {test_name} CRASHED: {e}")
+    print(f"\n{'='*50}")
+    print(f"🎯 TEST RESULTS: {passed}/{total} tests passed")
+    if passed == total:
+        print("🎉 ALL TESTS PASSED! Research system is ready!")
+    elif passed >= total * 0.75:
+        print("✅ Most tests passed! Research system should work well.")
+    elif passed >= total * 0.5:
+        print("⚠️ Some tests failed. Research system has limited functionality.")
+    else:
+        print("❌ Many tests failed. Please check setup and dependencies.")
+    return passed, total
+if __name__ == "__main__":
+    run_full_test_suite()