remove google scholar
Browse files- app.py +5 -54
- consilium_mcp +1 -0
- enhanced_search_functions.py +2 -8
- requirements.txt +1 -2
- research_tools/__init__.py +0 -2
- research_tools/base_tool.py +0 -1
- research_tools/research_agent.py +3 -5
- research_tools/scholar_search.py +0 -248
- test_research_tools.py +1 -4
app.py
CHANGED
@@ -392,33 +392,10 @@ class VisualConsensusEngine:
|
|
392 |
self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
|
393 |
|
394 |
elif function_name == "search_academic":
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
self.update_research_progress("Searching academic papers on arXiv...")
|
400 |
-
result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
401 |
-
self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
|
402 |
-
|
403 |
-
elif source == "scholar":
|
404 |
-
self.update_research_progress("Connecting to Google Scholar...")
|
405 |
-
self.update_research_progress("Searching peer-reviewed research...")
|
406 |
-
result = self.search_agent.tools['scholar'].search(arguments["query"])
|
407 |
-
self.update_research_progress(f"Google Scholar search complete - found {len(result)} characters")
|
408 |
-
|
409 |
-
else: # both sources
|
410 |
-
self.update_research_progress("Connecting to arXiv preprint server...")
|
411 |
-
self.update_research_progress("Searching academic papers on arXiv...")
|
412 |
-
arxiv_result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
413 |
-
self.update_research_progress(f"arXiv complete ({len(arxiv_result)} chars) - now searching Google Scholar...")
|
414 |
-
|
415 |
-
self.update_research_progress("Connecting to Google Scholar...")
|
416 |
-
self.update_research_progress("Searching peer-reviewed research...")
|
417 |
-
scholar_result = self.search_agent.tools['scholar'].search(arguments["query"])
|
418 |
-
self.update_research_progress("Combining arXiv and Google Scholar results...")
|
419 |
-
|
420 |
-
result = f"{arxiv_result}\n\n{scholar_result}"
|
421 |
-
self.update_research_progress(f"Academic search complete - combined {len(result)} characters")
|
422 |
|
423 |
elif function_name == "search_technology_trends":
|
424 |
self.update_research_progress("Connecting to GitHub API...")
|
@@ -1477,7 +1454,7 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme
|
|
1477 |
* Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
|
1478 |
* MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
|
1479 |
* Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
|
1480 |
-
* Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR
|
1481 |
* Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
|
1482 |
* Pick one model as the lead analyst (had the best results when picking Mistral).
|
1483 |
* Configure the amount of discussion rounds.
|
@@ -1704,32 +1681,6 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme
|
|
1704 |
""")
|
1705 |
|
1706 |
with gr.Tab("π Documentation"):
|
1707 |
-
gr.Markdown("""
|
1708 |
-
## π¬ **Research Capabilities**
|
1709 |
-
|
1710 |
-
### **π Multi-Source Research**
|
1711 |
-
- **DuckDuckGo Web Search**: Current events, news, real-time information
|
1712 |
-
- **Wikipedia**: Authoritative background and encyclopedic data
|
1713 |
-
- **arXiv**: Academic papers and scientific research preprints
|
1714 |
-
- **Google Scholar**: Peer-reviewed research and citation analysis
|
1715 |
-
- **GitHub**: Technology trends, adoption patterns, developer activity
|
1716 |
-
- **SEC EDGAR**: Public company financial data and regulatory filings
|
1717 |
-
|
1718 |
-
### **π― Smart Research Routing**
|
1719 |
-
The system automatically routes queries to the most appropriate sources:
|
1720 |
-
- **Academic queries** β arXiv + Google Scholar
|
1721 |
-
- **Technology questions** β GitHub + Web Search
|
1722 |
-
- **Company research** β SEC filings + Web Search
|
1723 |
-
- **Current events** β Web Search + Wikipedia
|
1724 |
-
- **Deep research** β Multi-source synthesis with quality scoring
|
1725 |
-
|
1726 |
-
### **π Research Quality Scoring**
|
1727 |
-
Each research result is scored on:
|
1728 |
-
- **Recency** (0-1): How current is the information
|
1729 |
-
- **Authority** (0-1): Source credibility and reliability
|
1730 |
-
- **Specificity** (0-1): Quantitative data and specific details
|
1731 |
-
- **Relevance** (0-1): How well it matches the query
|
1732 |
-
""")
|
1733 |
gr.Markdown("""
|
1734 |
## π **Expert Role Assignments**
|
1735 |
|
|
|
392 |
self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
|
393 |
|
394 |
elif function_name == "search_academic":
|
395 |
+
self.update_research_progress("Connecting to arXiv preprint server...")
|
396 |
+
self.update_research_progress("Searching academic papers on arXiv...")
|
397 |
+
result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
398 |
+
self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
elif function_name == "search_technology_trends":
|
401 |
self.update_research_progress("Connecting to GitHub API...")
|
|
|
1454 |
* Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
|
1455 |
* MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
|
1456 |
* Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
|
1457 |
+
* Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**) for comprehensive live research.
|
1458 |
* Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
|
1459 |
* Pick one model as the lead analyst (had the best results when picking Mistral).
|
1460 |
* Configure the amount of discussion rounds.
|
|
|
1681 |
""")
|
1682 |
|
1683 |
with gr.Tab("π Documentation"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1684 |
gr.Markdown("""
|
1685 |
## π **Expert Role Assignments**
|
1686 |
|
consilium_mcp
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 883815f94aa0a2cba5d9bf5ea89db12fd75a1676
|
enhanced_search_functions.py
CHANGED
@@ -48,19 +48,13 @@ ENHANCED_SEARCH_FUNCTIONS = [
|
|
48 |
"type": "function",
|
49 |
"function": {
|
50 |
"name": "search_academic",
|
51 |
-
"description": "Search academic papers and research on arXiv
|
52 |
"parameters": {
|
53 |
"type": "object",
|
54 |
"properties": {
|
55 |
"query": {
|
56 |
"type": "string",
|
57 |
"description": "Academic research query to find peer-reviewed papers and scientific studies"
|
58 |
-
},
|
59 |
-
"source": {
|
60 |
-
"type": "string",
|
61 |
-
"enum": ["arxiv", "scholar", "both"],
|
62 |
-
"description": "Academic source to search - arXiv for preprints, Scholar for citations, both for comprehensive",
|
63 |
-
"default": "both"
|
64 |
}
|
65 |
},
|
66 |
"required": ["query"]
|
@@ -117,7 +111,7 @@ ENHANCED_SEARCH_FUNCTIONS = [
|
|
117 |
"type": "array",
|
118 |
"items": {
|
119 |
"type": "string",
|
120 |
-
"enum": ["web", "wikipedia", "arxiv", "
|
121 |
},
|
122 |
"description": "Priority list of sources to focus on for this research",
|
123 |
"default": []
|
|
|
48 |
"type": "function",
|
49 |
"function": {
|
50 |
"name": "search_academic",
|
51 |
+
"description": "Search academic papers and research on arXiv for scientific evidence",
|
52 |
"parameters": {
|
53 |
"type": "object",
|
54 |
"properties": {
|
55 |
"query": {
|
56 |
"type": "string",
|
57 |
"description": "Academic research query to find peer-reviewed papers and scientific studies"
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
59 |
},
|
60 |
"required": ["query"]
|
|
|
111 |
"type": "array",
|
112 |
"items": {
|
113 |
"type": "string",
|
114 |
+
"enum": ["web", "wikipedia", "arxiv", "github", "sec"]
|
115 |
},
|
116 |
"description": "Priority list of sources to focus on for this research",
|
117 |
"default": []
|
requirements.txt
CHANGED
@@ -7,5 +7,4 @@ python-dotenv
|
|
7 |
duckduckgo-search
|
8 |
wikipedia
|
9 |
gradio-consilium-roundtable
|
10 |
-
openai
|
11 |
-
scholarly
|
|
|
7 |
duckduckgo-search
|
8 |
wikipedia
|
9 |
gradio-consilium-roundtable
|
10 |
+
openai
|
|
research_tools/__init__.py
CHANGED
@@ -5,7 +5,6 @@ from .wikipedia_search import WikipediaSearchTool
|
|
5 |
from .arxiv_search import ArxivSearchTool
|
6 |
from .github_search import GitHubSearchTool
|
7 |
from .sec_search import SECSearchTool
|
8 |
-
from .scholar_search import GoogleScholarTool
|
9 |
from .research_agent import EnhancedResearchAgent
|
10 |
|
11 |
__all__ = [
|
@@ -15,6 +14,5 @@ __all__ = [
|
|
15 |
'ArxivSearchTool',
|
16 |
'GitHubSearchTool',
|
17 |
'SECSearchTool',
|
18 |
-
'GoogleScholarTool',
|
19 |
'EnhancedResearchAgent'
|
20 |
]
|
|
|
5 |
from .arxiv_search import ArxivSearchTool
|
6 |
from .github_search import GitHubSearchTool
|
7 |
from .sec_search import SECSearchTool
|
|
|
8 |
from .research_agent import EnhancedResearchAgent
|
9 |
|
10 |
__all__ = [
|
|
|
14 |
'ArxivSearchTool',
|
15 |
'GitHubSearchTool',
|
16 |
'SECSearchTool',
|
|
|
17 |
'EnhancedResearchAgent'
|
18 |
]
|
research_tools/base_tool.py
CHANGED
@@ -65,7 +65,6 @@ class BaseTool(ABC):
|
|
65 |
"""Check source authority and credibility indicators"""
|
66 |
authority_indicators = {
|
67 |
'arxiv': 0.9,
|
68 |
-
'scholar': 0.9,
|
69 |
'sec': 0.95,
|
70 |
'github': 0.7,
|
71 |
'wikipedia': 0.8,
|
|
|
65 |
"""Check source authority and credibility indicators"""
|
66 |
authority_indicators = {
|
67 |
'arxiv': 0.9,
|
|
|
68 |
'sec': 0.95,
|
69 |
'github': 0.7,
|
70 |
'wikipedia': 0.8,
|
research_tools/research_agent.py
CHANGED
@@ -11,7 +11,6 @@ from .wikipedia_search import WikipediaSearchTool
|
|
11 |
from .arxiv_search import ArxivSearchTool
|
12 |
from .github_search import GitHubSearchTool
|
13 |
from .sec_search import SECSearchTool
|
14 |
-
from .scholar_search import GoogleScholarTool
|
15 |
|
16 |
|
17 |
class EnhancedResearchAgent:
|
@@ -24,8 +23,7 @@ class EnhancedResearchAgent:
|
|
24 |
'wikipedia': WikipediaSearchTool(),
|
25 |
'arxiv': ArxivSearchTool(),
|
26 |
'github': GitHubSearchTool(),
|
27 |
-
'sec': SECSearchTool()
|
28 |
-
'scholar': GoogleScholarTool()
|
29 |
}
|
30 |
|
31 |
# Tool availability status
|
@@ -92,7 +90,7 @@ class EnhancedResearchAgent:
|
|
92 |
for tool_name, tool in self.tools.items():
|
93 |
if tool.should_use_for_query(query):
|
94 |
# Return first matching tool based on priority order
|
95 |
-
priority_order = ['arxiv', 'sec', 'github', '
|
96 |
if tool_name in priority_order[:3]: # High-priority specialized tools
|
97 |
return tool_name
|
98 |
|
@@ -123,7 +121,7 @@ class EnhancedResearchAgent:
|
|
123 |
# Ensure we don't overwhelm with too many sources
|
124 |
if len(relevant_tools) > 4:
|
125 |
# Prioritize specialized tools
|
126 |
-
priority_order = ['arxiv', 'sec', 'github', '
|
127 |
relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
|
128 |
|
129 |
return relevant_tools
|
|
|
11 |
from .arxiv_search import ArxivSearchTool
|
12 |
from .github_search import GitHubSearchTool
|
13 |
from .sec_search import SECSearchTool
|
|
|
14 |
|
15 |
|
16 |
class EnhancedResearchAgent:
|
|
|
23 |
'wikipedia': WikipediaSearchTool(),
|
24 |
'arxiv': ArxivSearchTool(),
|
25 |
'github': GitHubSearchTool(),
|
26 |
+
'sec': SECSearchTool()
|
|
|
27 |
}
|
28 |
|
29 |
# Tool availability status
|
|
|
90 |
for tool_name, tool in self.tools.items():
|
91 |
if tool.should_use_for_query(query):
|
92 |
# Return first matching tool based on priority order
|
93 |
+
priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
|
94 |
if tool_name in priority_order[:3]: # High-priority specialized tools
|
95 |
return tool_name
|
96 |
|
|
|
121 |
# Ensure we don't overwhelm with too many sources
|
122 |
if len(relevant_tools) > 4:
|
123 |
# Prioritize specialized tools
|
124 |
+
priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
|
125 |
relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
|
126 |
|
127 |
return relevant_tools
|
research_tools/scholar_search.py
DELETED
@@ -1,248 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Google Scholar Search Tool for academic research
|
3 |
-
"""
|
4 |
-
from .base_tool import BaseTool
|
5 |
-
from typing import List, Dict, Optional
|
6 |
-
|
7 |
-
try:
|
8 |
-
from scholarly import scholarly
|
9 |
-
SCHOLARLY_AVAILABLE = True
|
10 |
-
except ImportError:
|
11 |
-
SCHOLARLY_AVAILABLE = False
|
12 |
-
|
13 |
-
|
14 |
-
class GoogleScholarTool(BaseTool):
|
15 |
-
"""Search Google Scholar for academic research papers"""
|
16 |
-
|
17 |
-
def __init__(self):
|
18 |
-
super().__init__("Google Scholar", "Search Google Scholar for academic research papers and citations")
|
19 |
-
self.available = SCHOLARLY_AVAILABLE
|
20 |
-
self.rate_limit_delay = 3.0 # Be very respectful to Google Scholar
|
21 |
-
|
22 |
-
def search(self, query: str, max_results: int = 4, **kwargs) -> str:
|
23 |
-
"""Search Google Scholar for research papers"""
|
24 |
-
if not self.available:
|
25 |
-
return self._unavailable_response(query)
|
26 |
-
|
27 |
-
self.rate_limit()
|
28 |
-
|
29 |
-
try:
|
30 |
-
# Search for publications with timeout handling
|
31 |
-
search_query = scholarly.search_pubs(query)
|
32 |
-
|
33 |
-
papers = []
|
34 |
-
for i, paper in enumerate(search_query):
|
35 |
-
if i >= max_results:
|
36 |
-
break
|
37 |
-
papers.append(paper)
|
38 |
-
|
39 |
-
if papers:
|
40 |
-
result = f"**Google Scholar Research for: {query}**\n\n"
|
41 |
-
result += self._format_scholar_results(papers)
|
42 |
-
result += self._analyze_research_quality(papers)
|
43 |
-
return result
|
44 |
-
else:
|
45 |
-
return f"**Google Scholar Research for: {query}**\n\nNo relevant academic papers found."
|
46 |
-
|
47 |
-
except Exception as e:
|
48 |
-
error_msg = str(e)
|
49 |
-
if "blocked" in error_msg.lower() or "captcha" in error_msg.lower():
|
50 |
-
return f"**Google Scholar Research for: {query}**\n\nGoogle Scholar is temporarily blocking automated requests. This is normal behavior. Academic research is available through other sources like arXiv."
|
51 |
-
elif "timeout" in error_msg.lower():
|
52 |
-
return f"**Google Scholar Research for: {query}**\n\nRequest timeout - Google Scholar may be experiencing high load. Academic research available but slower than expected."
|
53 |
-
else:
|
54 |
-
return self.format_error_response(query, str(e))
|
55 |
-
|
56 |
-
def _unavailable_response(self, query: str) -> str:
|
57 |
-
"""Response when scholarly library is not available"""
|
58 |
-
result = f"**Google Scholar Research for: {query}**\n\n"
|
59 |
-
result += "**Library Not Available**\n"
|
60 |
-
result += "Google Scholar integration requires the 'scholarly' library.\n\n"
|
61 |
-
result += "**Installation Instructions:**\n"
|
62 |
-
result += "```bash\n"
|
63 |
-
result += "pip install scholarly\n"
|
64 |
-
result += "```\n\n"
|
65 |
-
result += "**Alternative Academic Sources:**\n"
|
66 |
-
result += "β’ arXiv (for preprints and technical papers)\n"
|
67 |
-
result += "β’ PubMed (for medical and life sciences)\n"
|
68 |
-
result += "β’ IEEE Xplore (for engineering and computer science)\n"
|
69 |
-
result += "β’ JSTOR (for humanities and social sciences)\n\n"
|
70 |
-
result += "**Research Recommendation:**\n"
|
71 |
-
result += f"For the query '{query}', consider searching:\n"
|
72 |
-
result += "β’ Recent academic publications\n"
|
73 |
-
result += "β’ Peer-reviewed research articles\n"
|
74 |
-
result += "β’ Citation networks and impact metrics\n\n"
|
75 |
-
|
76 |
-
return result
|
77 |
-
|
78 |
-
def _format_scholar_results(self, papers: List[Dict]) -> str:
|
79 |
-
"""Format Google Scholar search results"""
|
80 |
-
result = ""
|
81 |
-
|
82 |
-
for i, paper in enumerate(papers, 1):
|
83 |
-
# Extract paper information safely with better handling
|
84 |
-
title = paper.get('title', paper.get('bib', {}).get('title', 'Unknown Title'))
|
85 |
-
|
86 |
-
# Handle authors more robustly
|
87 |
-
authors = self._format_authors(paper.get('author', paper.get('bib', {}).get('author', [])))
|
88 |
-
|
89 |
-
# Get year from multiple possible locations
|
90 |
-
year = (paper.get('year') or
|
91 |
-
paper.get('bib', {}).get('pub_year') or
|
92 |
-
paper.get('bib', {}).get('year') or
|
93 |
-
'Unknown Year')
|
94 |
-
|
95 |
-
# Get venue from multiple possible locations
|
96 |
-
venue = (paper.get('venue') or
|
97 |
-
paper.get('bib', {}).get('venue') or
|
98 |
-
paper.get('bib', {}).get('journal') or
|
99 |
-
paper.get('bib', {}).get('booktitle') or
|
100 |
-
'Unknown Venue')
|
101 |
-
|
102 |
-
citations = paper.get('num_citations', paper.get('citedby', 0))
|
103 |
-
|
104 |
-
result += f"**Paper {i}: {title}**\n"
|
105 |
-
result += f"Authors: {authors}\n"
|
106 |
-
result += f"Year: {year} | Venue: {venue}\n"
|
107 |
-
result += f"Citations: {citations:,}\n"
|
108 |
-
|
109 |
-
# Add abstract if available
|
110 |
-
abstract = (paper.get('abstract') or
|
111 |
-
paper.get('bib', {}).get('abstract') or
|
112 |
-
paper.get('summary'))
|
113 |
-
|
114 |
-
if abstract and len(str(abstract).strip()) > 10:
|
115 |
-
abstract_text = str(abstract)
|
116 |
-
if len(abstract_text) > 300:
|
117 |
-
abstract_text = abstract_text[:300] + "..."
|
118 |
-
result += f"Abstract: {abstract_text}\n"
|
119 |
-
|
120 |
-
# Add URL if available
|
121 |
-
url = (paper.get('url') or
|
122 |
-
paper.get('pub_url') or
|
123 |
-
paper.get('eprint_url'))
|
124 |
-
|
125 |
-
if url:
|
126 |
-
result += f"URL: {url}\n"
|
127 |
-
|
128 |
-
result += "\n"
|
129 |
-
|
130 |
-
return result
|
131 |
-
|
132 |
-
def _format_authors(self, authors) -> str:
|
133 |
-
"""Format author list safely with improved handling"""
|
134 |
-
if not authors:
|
135 |
-
return "Unknown Authors"
|
136 |
-
|
137 |
-
if isinstance(authors, str):
|
138 |
-
return authors
|
139 |
-
elif isinstance(authors, list):
|
140 |
-
# Handle list of author dictionaries or strings
|
141 |
-
author_names = []
|
142 |
-
for author in authors[:5]: # Limit to first 5 authors
|
143 |
-
if isinstance(author, dict):
|
144 |
-
# Try different possible name fields
|
145 |
-
name = (author.get('name') or
|
146 |
-
author.get('full_name') or
|
147 |
-
author.get('firstname', '') + ' ' + author.get('lastname', '') or
|
148 |
-
str(author))
|
149 |
-
name = name.strip()
|
150 |
-
else:
|
151 |
-
name = str(author).strip()
|
152 |
-
|
153 |
-
if name and name != 'Unknown Authors':
|
154 |
-
author_names.append(name)
|
155 |
-
|
156 |
-
if not author_names:
|
157 |
-
return "Unknown Authors"
|
158 |
-
|
159 |
-
if len(authors) > 5:
|
160 |
-
author_names.append("et al.")
|
161 |
-
|
162 |
-
return ", ".join(author_names)
|
163 |
-
else:
|
164 |
-
return str(authors) if authors else "Unknown Authors"
|
165 |
-
|
166 |
-
def _analyze_research_quality(self, papers: List[Dict]) -> str:
|
167 |
-
"""Analyze the quality and impact of research results"""
|
168 |
-
if not papers:
|
169 |
-
return ""
|
170 |
-
|
171 |
-
# Calculate citation metrics
|
172 |
-
citations = [paper.get('num_citations', 0) for paper in papers]
|
173 |
-
total_citations = sum(citations)
|
174 |
-
avg_citations = total_citations / len(papers) if papers else 0
|
175 |
-
high_impact_papers = sum(1 for c in citations if c > 100)
|
176 |
-
|
177 |
-
# Analyze publication years
|
178 |
-
years = [paper.get('year') for paper in papers if paper.get('year')]
|
179 |
-
recent_papers = sum(1 for year in years if isinstance(year, (int, str)) and str(year) in ['2023', '2024', '2025'])
|
180 |
-
|
181 |
-
# Analyze venues
|
182 |
-
venues = [paper.get('venue', '') for paper in papers]
|
183 |
-
unique_venues = len(set(v for v in venues if v and v != 'Unknown Venue'))
|
184 |
-
|
185 |
-
result = f"**Research Quality Analysis:**\n"
|
186 |
-
result += f"β’ Papers analyzed: {len(papers)}\n"
|
187 |
-
result += f"β’ Total citations: {total_citations:,}\n"
|
188 |
-
result += f"β’ Average citations per paper: {avg_citations:.1f}\n"
|
189 |
-
result += f"β’ High-impact papers (>100 citations): {high_impact_papers}\n"
|
190 |
-
result += f"β’ Recent publications (2023-2025): {recent_papers}\n"
|
191 |
-
result += f"β’ Venue diversity: {unique_venues} different publication venues\n"
|
192 |
-
|
193 |
-
# Research quality assessment
|
194 |
-
if avg_citations > 50:
|
195 |
-
quality_level = "High Impact"
|
196 |
-
elif avg_citations > 20:
|
197 |
-
quality_level = "Moderate Impact"
|
198 |
-
elif avg_citations > 5:
|
199 |
-
quality_level = "Emerging Research"
|
200 |
-
else:
|
201 |
-
quality_level = "Early Stage"
|
202 |
-
|
203 |
-
result += f"β’ Research maturity: {quality_level}\n"
|
204 |
-
|
205 |
-
# Authority assessment
|
206 |
-
if high_impact_papers > 0 and recent_papers > 0:
|
207 |
-
authority = "High - Established field with recent developments"
|
208 |
-
elif high_impact_papers > 0:
|
209 |
-
authority = "Moderate - Established field, may need recent updates"
|
210 |
-
elif recent_papers > 0:
|
211 |
-
authority = "Emerging - New research area with growing interest"
|
212 |
-
else:
|
213 |
-
authority = "Limited - Sparse academic coverage"
|
214 |
-
|
215 |
-
result += f"β’ Academic authority: {authority}\n\n"
|
216 |
-
|
217 |
-
return result
|
218 |
-
|
219 |
-
def should_use_for_query(self, query: str) -> bool:
|
220 |
-
"""Google Scholar is good for academic research, citations, and scholarly articles"""
|
221 |
-
academic_indicators = [
|
222 |
-
'research', 'study', 'academic', 'paper', 'journal', 'peer-reviewed',
|
223 |
-
'citation', 'scholar', 'university', 'professor', 'phd', 'thesis',
|
224 |
-
'methodology', 'experiment', 'analysis', 'theory', 'empirical',
|
225 |
-
'literature review', 'meta-analysis', 'systematic review',
|
226 |
-
'conference', 'publication', 'scholarly'
|
227 |
-
]
|
228 |
-
|
229 |
-
query_lower = query.lower()
|
230 |
-
return any(indicator in query_lower for indicator in academic_indicators)
|
231 |
-
|
232 |
-
def extract_key_info(self, text: str) -> dict:
|
233 |
-
"""Extract key information from Scholar results"""
|
234 |
-
base_info = super().extract_key_info(text)
|
235 |
-
|
236 |
-
if text:
|
237 |
-
# Look for Scholar-specific patterns
|
238 |
-
base_info.update({
|
239 |
-
'has_citations': 'Citations:' in text,
|
240 |
-
'has_abstracts': 'Abstract:' in text,
|
241 |
-
'has_venues': 'Venue:' in text,
|
242 |
-
'has_recent_papers': any(year in text for year in ['2023', '2024', '2025']),
|
243 |
-
'has_high_impact': any(citation in text for citation in ['100', '200', '500', '1000']),
|
244 |
-
'is_available': 'Library Not Available' not in text,
|
245 |
-
'paper_count': text.count('**Paper')
|
246 |
-
})
|
247 |
-
|
248 |
-
return base_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_research_tools.py
CHANGED
@@ -35,7 +35,6 @@ def test_tool_imports():
|
|
35 |
from research_tools.arxiv_search import ArxivSearchTool
|
36 |
from research_tools.github_search import GitHubSearchTool
|
37 |
from research_tools.sec_search import SECSearchTool
|
38 |
-
from research_tools.scholar_search import GoogleScholarTool
|
39 |
|
40 |
print("β
All tool imports successful")
|
41 |
return True
|
@@ -108,8 +107,7 @@ def test_individual_tools():
|
|
108 |
'wikipedia': ('artificial intelligence', {}),
|
109 |
'arxiv': ('machine learning', {}),
|
110 |
'github': ('python', {}),
|
111 |
-
'sec': ('Apple', {})
|
112 |
-
'scholar': ('deep learning', {})
|
113 |
}
|
114 |
|
115 |
for tool_name, (query, kwargs) in test_queries.items():
|
@@ -262,7 +260,6 @@ def test_dependency_check():
|
|
262 |
'requests': 'HTTP requests',
|
263 |
'xml.etree.ElementTree': 'XML parsing (built-in)',
|
264 |
'wikipedia': 'Wikipedia search',
|
265 |
-
'scholarly': 'Google Scholar (optional)',
|
266 |
'smolagents': 'Web search agents'
|
267 |
}
|
268 |
|
|
|
35 |
from research_tools.arxiv_search import ArxivSearchTool
|
36 |
from research_tools.github_search import GitHubSearchTool
|
37 |
from research_tools.sec_search import SECSearchTool
|
|
|
38 |
|
39 |
print("β
All tool imports successful")
|
40 |
return True
|
|
|
107 |
'wikipedia': ('artificial intelligence', {}),
|
108 |
'arxiv': ('machine learning', {}),
|
109 |
'github': ('python', {}),
|
110 |
+
'sec': ('Apple', {})
|
|
|
111 |
}
|
112 |
|
113 |
for tool_name, (query, kwargs) in test_queries.items():
|
|
|
260 |
'requests': 'HTTP requests',
|
261 |
'xml.etree.ElementTree': 'XML parsing (built-in)',
|
262 |
'wikipedia': 'Wikipedia search',
|
|
|
263 |
'smolagents': 'Web search agents'
|
264 |
}
|
265 |
|