Spaces:

Agents-MCP-Hackathon
/

consilium_mcp

Running

App Files Files Community

azettl commited on 9 days ago

Commit

6d0f82e

1 Parent(s): 91b73c1

remove google scholar

Browse files

Files changed (9) hide show

app.py +5 -54
consilium_mcp +1 -0
enhanced_search_functions.py +2 -8
requirements.txt +1 -2
research_tools/__init__.py +0 -2
research_tools/base_tool.py +0 -1
research_tools/research_agent.py +3 -5
research_tools/scholar_search.py +0 -248
test_research_tools.py +1 -4

app.py CHANGED Viewed

@@ -392,33 +392,10 @@ class VisualConsensusEngine:
                 self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
             elif function_name == "search_academic":
-                source = arguments.get("source", "both")
-                if source == "arxiv":
-                    self.update_research_progress("Connecting to arXiv preprint server...")
-                    self.update_research_progress("Searching academic papers on arXiv...")
-                    result = self.search_agent.tools['arxiv'].search(arguments["query"])
-                    self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
-                elif source == "scholar":
-                    self.update_research_progress("Connecting to Google Scholar...")
-                    self.update_research_progress("Searching peer-reviewed research...")
-                    result = self.search_agent.tools['scholar'].search(arguments["query"])
-                    self.update_research_progress(f"Google Scholar search complete - found {len(result)} characters")
-                else:  # both sources
-                    self.update_research_progress("Connecting to arXiv preprint server...")
-                    self.update_research_progress("Searching academic papers on arXiv...")
-                    arxiv_result = self.search_agent.tools['arxiv'].search(arguments["query"])
-                    self.update_research_progress(f"arXiv complete ({len(arxiv_result)} chars) - now searching Google Scholar...")
-                    self.update_research_progress("Connecting to Google Scholar...")
-                    self.update_research_progress("Searching peer-reviewed research...")
-                    scholar_result = self.search_agent.tools['scholar'].search(arguments["query"])
-                    self.update_research_progress("Combining arXiv and Google Scholar results...")
-                    result = f"{arxiv_result}\n\n{scholar_result}"
-                    self.update_research_progress(f"Academic search complete - combined {len(result)} characters")
             elif function_name == "search_technology_trends":
                 self.update_research_progress("Connecting to GitHub API...")
@@ -1477,7 +1454,7 @@ with gr.Blocks(title="🎭 Consilium: Multi-AI Expert Consensus Platform", theme
     * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
     * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
     * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
-    * Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**, **Google Scholar**) for comprehensive live research.
     * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
     * Pick one model as the lead analyst (had the best results when picking Mistral).
     * Configure the amount of discussion rounds.
@@ -1704,32 +1681,6 @@ with gr.Blocks(title="🎭 Consilium: Multi-AI Expert Consensus Platform", theme
         """)
     with gr.Tab("📚 Documentation"):
-        gr.Markdown("""
-        ## 🔬 **Research Capabilities**
-        ### **🌐 Multi-Source Research**
-        - **DuckDuckGo Web Search**: Current events, news, real-time information
-        - **Wikipedia**: Authoritative background and encyclopedic data
-        - **arXiv**: Academic papers and scientific research preprints
-        - **Google Scholar**: Peer-reviewed research and citation analysis
-        - **GitHub**: Technology trends, adoption patterns, developer activity
-        - **SEC EDGAR**: Public company financial data and regulatory filings
-        ### **🎯 Smart Research Routing**
-        The system automatically routes queries to the most appropriate sources:
-        - **Academic queries** → arXiv + Google Scholar
-        - **Technology questions** → GitHub + Web Search
-        - **Company research** → SEC filings + Web Search
-        - **Current events** → Web Search + Wikipedia
-        - **Deep research** → Multi-source synthesis with quality scoring
-        ### **📊 Research Quality Scoring**
-        Each research result is scored on:
-        - **Recency** (0-1): How current is the information
-        - **Authority** (0-1): Source credibility and reliability
-        - **Specificity** (0-1): Quantitative data and specific details
-        - **Relevance** (0-1): How well it matches the query
-        """)
         gr.Markdown("""
         ## 🎓 **Expert Role Assignments**

                 self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
             elif function_name == "search_academic":
+                self.update_research_progress("Connecting to arXiv preprint server...")
+                self.update_research_progress("Searching academic papers on arXiv...")
+                result = self.search_agent.tools['arxiv'].search(arguments["query"])
+                self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
             elif function_name == "search_technology_trends":
                 self.update_research_progress("Connecting to GitHub API...")
     * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
     * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
     * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
+    * Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**) for comprehensive live research.
     * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
     * Pick one model as the lead analyst (had the best results when picking Mistral).
     * Configure the amount of discussion rounds.
         """)
     with gr.Tab("📚 Documentation"):
         gr.Markdown("""
         ## 🎓 **Expert Role Assignments**

consilium_mcp ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 883815f94aa0a2cba5d9bf5ea89db12fd75a1676

enhanced_search_functions.py CHANGED Viewed

@@ -48,19 +48,13 @@ ENHANCED_SEARCH_FUNCTIONS = [
         "type": "function",
         "function": {
             "name": "search_academic",
-            "description": "Search academic papers and research on arXiv and Google Scholar for scientific evidence",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "query": {
                         "type": "string",
                         "description": "Academic research query to find peer-reviewed papers and scientific studies"
-                    },
-                    "source": {
-                        "type": "string",
-                        "enum": ["arxiv", "scholar", "both"],
-                        "description": "Academic source to search - arXiv for preprints, Scholar for citations, both for comprehensive",
-                        "default": "both"
                     }
                 },
                 "required": ["query"]
@@ -117,7 +111,7 @@ ENHANCED_SEARCH_FUNCTIONS = [
                         "type": "array",
                         "items": {
                             "type": "string",
-                            "enum": ["web", "wikipedia", "arxiv", "scholar", "github", "sec"]
                         },
                         "description": "Priority list of sources to focus on for this research",
                         "default": []

         "type": "function",
         "function": {
             "name": "search_academic",
+            "description": "Search academic papers and research on arXiv for scientific evidence",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "query": {
                         "type": "string",
                         "description": "Academic research query to find peer-reviewed papers and scientific studies"
                     }
                 },
                 "required": ["query"]
                         "type": "array",
                         "items": {
                             "type": "string",
+                            "enum": ["web", "wikipedia", "arxiv", "github", "sec"]
                         },
                         "description": "Priority list of sources to focus on for this research",
                         "default": []

requirements.txt CHANGED Viewed

@@ -7,5 +7,4 @@ python-dotenv
 duckduckgo-search
 wikipedia
 gradio-consilium-roundtable
-openai
-scholarly

 duckduckgo-search
 wikipedia
 gradio-consilium-roundtable
+openai

research_tools/__init__.py CHANGED Viewed

@@ -5,7 +5,6 @@ from .wikipedia_search import WikipediaSearchTool
 from .arxiv_search import ArxivSearchTool
 from .github_search import GitHubSearchTool
 from .sec_search import SECSearchTool
-from .scholar_search import GoogleScholarTool
 from .research_agent import EnhancedResearchAgent
 __all__ = [
@@ -15,6 +14,5 @@ __all__ = [
     'ArxivSearchTool',
     'GitHubSearchTool',
     'SECSearchTool',
-    'GoogleScholarTool',
     'EnhancedResearchAgent'
 ]

 from .arxiv_search import ArxivSearchTool
 from .github_search import GitHubSearchTool
 from .sec_search import SECSearchTool
 from .research_agent import EnhancedResearchAgent
 __all__ = [
     'ArxivSearchTool',
     'GitHubSearchTool',
     'SECSearchTool',
     'EnhancedResearchAgent'
 ]

research_tools/base_tool.py CHANGED Viewed

@@ -65,7 +65,6 @@ class BaseTool(ABC):
         """Check source authority and credibility indicators"""
         authority_indicators = {
             'arxiv': 0.9,
-            'scholar': 0.9,
             'sec': 0.95,
             'github': 0.7,
             'wikipedia': 0.8,

         """Check source authority and credibility indicators"""
         authority_indicators = {
             'arxiv': 0.9,
             'sec': 0.95,
             'github': 0.7,
             'wikipedia': 0.8,

research_tools/research_agent.py CHANGED Viewed

@@ -11,7 +11,6 @@ from .wikipedia_search import WikipediaSearchTool
 from .arxiv_search import ArxivSearchTool
 from .github_search import GitHubSearchTool
 from .sec_search import SECSearchTool
-from .scholar_search import GoogleScholarTool
 class EnhancedResearchAgent:
@@ -24,8 +23,7 @@ class EnhancedResearchAgent:
             'wikipedia': WikipediaSearchTool(),
             'arxiv': ArxivSearchTool(),
             'github': GitHubSearchTool(),
-            'sec': SECSearchTool(),
-            'scholar': GoogleScholarTool()
         }
         # Tool availability status
@@ -92,7 +90,7 @@ class EnhancedResearchAgent:
         for tool_name, tool in self.tools.items():
             if tool.should_use_for_query(query):
                 # Return first matching tool based on priority order
-                priority_order = ['arxiv', 'sec', 'github', 'scholar', 'wikipedia', 'web']
                 if tool_name in priority_order[:3]:  # High-priority specialized tools
                     return tool_name
@@ -123,7 +121,7 @@ class EnhancedResearchAgent:
         # Ensure we don't overwhelm with too many sources
         if len(relevant_tools) > 4:
             # Prioritize specialized tools
-            priority_order = ['arxiv', 'sec', 'github', 'scholar', 'wikipedia', 'web']
             relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
         return relevant_tools

 from .arxiv_search import ArxivSearchTool
 from .github_search import GitHubSearchTool
 from .sec_search import SECSearchTool
 class EnhancedResearchAgent:
             'wikipedia': WikipediaSearchTool(),
             'arxiv': ArxivSearchTool(),
             'github': GitHubSearchTool(),
+            'sec': SECSearchTool()
         }
         # Tool availability status
         for tool_name, tool in self.tools.items():
             if tool.should_use_for_query(query):
                 # Return first matching tool based on priority order
+                priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
                 if tool_name in priority_order[:3]:  # High-priority specialized tools
                     return tool_name
         # Ensure we don't overwhelm with too many sources
         if len(relevant_tools) > 4:
             # Prioritize specialized tools
+            priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
             relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
         return relevant_tools

research_tools/scholar_search.py DELETED Viewed

@@ -1,248 +0,0 @@
-"""
-Google Scholar Search Tool for academic research
-"""
-from .base_tool import BaseTool
-from typing import List, Dict, Optional
-try:
-    from scholarly import scholarly
-    SCHOLARLY_AVAILABLE = True
-except ImportError:
-    SCHOLARLY_AVAILABLE = False
-class GoogleScholarTool(BaseTool):
-    """Search Google Scholar for academic research papers"""
-    def __init__(self):
-        super().__init__("Google Scholar", "Search Google Scholar for academic research papers and citations")
-        self.available = SCHOLARLY_AVAILABLE
-        self.rate_limit_delay = 3.0  # Be very respectful to Google Scholar
-    def search(self, query: str, max_results: int = 4, **kwargs) -> str:
-        """Search Google Scholar for research papers"""
-        if not self.available:
-            return self._unavailable_response(query)
-        self.rate_limit()
-        try:
-            # Search for publications with timeout handling
-            search_query = scholarly.search_pubs(query)
-            papers = []
-            for i, paper in enumerate(search_query):
-                if i >= max_results:
-                    break
-                papers.append(paper)
-            if papers:
-                result = f"**Google Scholar Research for: {query}**\n\n"
-                result += self._format_scholar_results(papers)
-                result += self._analyze_research_quality(papers)
-                return result
-            else:
-                return f"**Google Scholar Research for: {query}**\n\nNo relevant academic papers found."
-        except Exception as e:
-            error_msg = str(e)
-            if "blocked" in error_msg.lower() or "captcha" in error_msg.lower():
-                return f"**Google Scholar Research for: {query}**\n\nGoogle Scholar is temporarily blocking automated requests. This is normal behavior. Academic research is available through other sources like arXiv."
-            elif "timeout" in error_msg.lower():
-                return f"**Google Scholar Research for: {query}**\n\nRequest timeout - Google Scholar may be experiencing high load. Academic research available but slower than expected."
-            else:
-                return self.format_error_response(query, str(e))
-    def _unavailable_response(self, query: str) -> str:
-        """Response when scholarly library is not available"""
-        result = f"**Google Scholar Research for: {query}**\n\n"
-        result += "**Library Not Available**\n"
-        result += "Google Scholar integration requires the 'scholarly' library.\n\n"
-        result += "**Installation Instructions:**\n"
-        result += "```bash\n"
-        result += "pip install scholarly\n"
-        result += "```\n\n"
-        result += "**Alternative Academic Sources:**\n"
-        result += "• arXiv (for preprints and technical papers)\n"
-        result += "• PubMed (for medical and life sciences)\n"
-        result += "• IEEE Xplore (for engineering and computer science)\n"
-        result += "• JSTOR (for humanities and social sciences)\n\n"
-        result += "**Research Recommendation:**\n"
-        result += f"For the query '{query}', consider searching:\n"
-        result += "• Recent academic publications\n"
-        result += "• Peer-reviewed research articles\n"
-        result += "• Citation networks and impact metrics\n\n"
-        return result
-    def _format_scholar_results(self, papers: List[Dict]) -> str:
-        """Format Google Scholar search results"""
-        result = ""
-        for i, paper in enumerate(papers, 1):
-            # Extract paper information safely with better handling
-            title = paper.get('title', paper.get('bib', {}).get('title', 'Unknown Title'))
-            # Handle authors more robustly
-            authors = self._format_authors(paper.get('author', paper.get('bib', {}).get('author', [])))
-            # Get year from multiple possible locations
-            year = (paper.get('year') or
-                   paper.get('bib', {}).get('pub_year') or
-                   paper.get('bib', {}).get('year') or
-                   'Unknown Year')
-            # Get venue from multiple possible locations
-            venue = (paper.get('venue') or
-                    paper.get('bib', {}).get('venue') or
-                    paper.get('bib', {}).get('journal') or
-                    paper.get('bib', {}).get('booktitle') or
-                    'Unknown Venue')
-            citations = paper.get('num_citations', paper.get('citedby', 0))
-            result += f"**Paper {i}: {title}**\n"
-            result += f"Authors: {authors}\n"
-            result += f"Year: {year} | Venue: {venue}\n"
-            result += f"Citations: {citations:,}\n"
-            # Add abstract if available
-            abstract = (paper.get('abstract') or
-                       paper.get('bib', {}).get('abstract') or
-                       paper.get('summary'))
-            if abstract and len(str(abstract).strip()) > 10:
-                abstract_text = str(abstract)
-                if len(abstract_text) > 300:
-                    abstract_text = abstract_text[:300] + "..."
-                result += f"Abstract: {abstract_text}\n"
-            # Add URL if available
-            url = (paper.get('url') or
-                  paper.get('pub_url') or
-                  paper.get('eprint_url'))
-            if url:
-                result += f"URL: {url}\n"
-            result += "\n"
-        return result
-    def _format_authors(self, authors) -> str:
-        """Format author list safely with improved handling"""
-        if not authors:
-            return "Unknown Authors"
-        if isinstance(authors, str):
-            return authors
-        elif isinstance(authors, list):
-            # Handle list of author dictionaries or strings
-            author_names = []
-            for author in authors[:5]:  # Limit to first 5 authors
-                if isinstance(author, dict):
-                    # Try different possible name fields
-                    name = (author.get('name') or
-                           author.get('full_name') or
-                           author.get('firstname', '') + ' ' + author.get('lastname', '') or
-                           str(author))
-                    name = name.strip()
-                else:
-                    name = str(author).strip()
-                if name and name != 'Unknown Authors':
-                    author_names.append(name)
-            if not author_names:
-                return "Unknown Authors"
-            if len(authors) > 5:
-                author_names.append("et al.")
-            return ", ".join(author_names)
-        else:
-            return str(authors) if authors else "Unknown Authors"
-    def _analyze_research_quality(self, papers: List[Dict]) -> str:
-        """Analyze the quality and impact of research results"""
-        if not papers:
-            return ""
-        # Calculate citation metrics
-        citations = [paper.get('num_citations', 0) for paper in papers]
-        total_citations = sum(citations)
-        avg_citations = total_citations / len(papers) if papers else 0
-        high_impact_papers = sum(1 for c in citations if c > 100)
-        # Analyze publication years
-        years = [paper.get('year') for paper in papers if paper.get('year')]
-        recent_papers = sum(1 for year in years if isinstance(year, (int, str)) and str(year) in ['2023', '2024', '2025'])
-        # Analyze venues
-        venues = [paper.get('venue', '') for paper in papers]
-        unique_venues = len(set(v for v in venues if v and v != 'Unknown Venue'))
-        result = f"**Research Quality Analysis:**\n"
-        result += f"• Papers analyzed: {len(papers)}\n"
-        result += f"• Total citations: {total_citations:,}\n"
-        result += f"• Average citations per paper: {avg_citations:.1f}\n"
-        result += f"• High-impact papers (>100 citations): {high_impact_papers}\n"
-        result += f"• Recent publications (2023-2025): {recent_papers}\n"
-        result += f"• Venue diversity: {unique_venues} different publication venues\n"
-        # Research quality assessment
-        if avg_citations > 50:
-            quality_level = "High Impact"
-        elif avg_citations > 20:
-            quality_level = "Moderate Impact"
-        elif avg_citations > 5:
-            quality_level = "Emerging Research"
-        else:
-            quality_level = "Early Stage"
-        result += f"• Research maturity: {quality_level}\n"
-        # Authority assessment
-        if high_impact_papers > 0 and recent_papers > 0:
-            authority = "High - Established field with recent developments"
-        elif high_impact_papers > 0:
-            authority = "Moderate - Established field, may need recent updates"
-        elif recent_papers > 0:
-            authority = "Emerging - New research area with growing interest"
-        else:
-            authority = "Limited - Sparse academic coverage"
-        result += f"• Academic authority: {authority}\n\n"
-        return result
-    def should_use_for_query(self, query: str) -> bool:
-        """Google Scholar is good for academic research, citations, and scholarly articles"""
-        academic_indicators = [
-            'research', 'study', 'academic', 'paper', 'journal', 'peer-reviewed',
-            'citation', 'scholar', 'university', 'professor', 'phd', 'thesis',
-            'methodology', 'experiment', 'analysis', 'theory', 'empirical',
-            'literature review', 'meta-analysis', 'systematic review',
-            'conference', 'publication', 'scholarly'
-        ]
-        query_lower = query.lower()
-        return any(indicator in query_lower for indicator in academic_indicators)
-    def extract_key_info(self, text: str) -> dict:
-        """Extract key information from Scholar results"""
-        base_info = super().extract_key_info(text)
-        if text:
-            # Look for Scholar-specific patterns
-            base_info.update({
-                'has_citations': 'Citations:' in text,
-                'has_abstracts': 'Abstract:' in text,
-                'has_venues': 'Venue:' in text,
-                'has_recent_papers': any(year in text for year in ['2023', '2024', '2025']),
-                'has_high_impact': any(citation in text for citation in ['100', '200', '500', '1000']),
-                'is_available': 'Library Not Available' not in text,
-                'paper_count': text.count('**Paper')
-            })
-        return base_info

test_research_tools.py CHANGED Viewed

@@ -35,7 +35,6 @@ def test_tool_imports():
         from research_tools.arxiv_search import ArxivSearchTool
         from research_tools.github_search import GitHubSearchTool
         from research_tools.sec_search import SECSearchTool
-        from research_tools.scholar_search import GoogleScholarTool
         print("✅ All tool imports successful")
         return True
@@ -108,8 +107,7 @@ def test_individual_tools():
             'wikipedia': ('artificial intelligence', {}),
             'arxiv': ('machine learning', {}),
             'github': ('python', {}),
-            'sec': ('Apple', {}),  # Remove max_results for SEC
-            'scholar': ('deep learning', {})
         }
         for tool_name, (query, kwargs) in test_queries.items():
@@ -262,7 +260,6 @@ def test_dependency_check():
         'requests': 'HTTP requests',
         'xml.etree.ElementTree': 'XML parsing (built-in)',
         'wikipedia': 'Wikipedia search',
-        'scholarly': 'Google Scholar (optional)',
         'smolagents': 'Web search agents'
     }

         from research_tools.arxiv_search import ArxivSearchTool
         from research_tools.github_search import GitHubSearchTool
         from research_tools.sec_search import SECSearchTool
         print("✅ All tool imports successful")
         return True
             'wikipedia': ('artificial intelligence', {}),
             'arxiv': ('machine learning', {}),
             'github': ('python', {}),
+            'sec': ('Apple', {})
         }
         for tool_name, (query, kwargs) in test_queries.items():
         'requests': 'HTTP requests',
         'xml.etree.ElementTree': 'XML parsing (built-in)',
         'wikipedia': 'Wikipedia search',
         'smolagents': 'Web search agents'
     }