|
import os |
|
import gradio as gr |
|
import cohere |
|
from crewai import Agent, Task, Crew, Process |
|
|
|
from langchain_groq import ChatGroq |
|
from langchain_cohere import ChatCohere |
|
|
|
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults |
|
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool |
|
|
|
|
|
cohere_api_key = os.getenv('COHERE_API_KEY') |
|
if not cohere_api_key: |
|
raise EnvironmentError("COHERE_API_KEY is not set in environment variables") |
|
groq_api_key = os.getenv("GROQ_API_KEY") |
|
if not groq_api_key: |
|
raise EnvironmentError("GROQ_API_KEY is not set in environment variables") |
|
|
|
|
|
co = cohere.Client(cohere_api_key) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool('DuckDuckGoSearchResults') |
|
def search_results(search_query: str) -> dict: |
|
""" |
|
Performs a web search using the DuckDuckGo search engine to gather and return a collection of search results. |
|
This tool automates the retrieval of web-based information related to a specified query. |
|
Args: |
|
- search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs. |
|
Returns: |
|
- list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes at least the 'title' of the page and the 'url' linking to it. Additional information such as a brief summary or snippet from the page might also be included depending on the specifics of the DuckDuckGo API response. |
|
""" |
|
return DuckDuckGoSearchResults(max_results=10).run(search_query) |
|
|
|
@tool('WebScrapper') |
|
def web_scrapper(url: str, topic: str) -> str: |
|
""" |
|
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic. |
|
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. |
|
This tool uses Cohere's API to generate summaries, which can be particularly useful for web scraping tasks, data collection, |
|
or extracting specific information from websites. |
|
|
|
Args: |
|
- url (str): The URL from which to scrape content. |
|
- topic (str): The specific topic on which to generate a summary. |
|
Returns: |
|
- summary (str): summary of the url on the topic |
|
""" |
|
|
|
scraper = ScrapeWebsiteTool(website_url=url) |
|
content = scraper.run() |
|
|
|
|
|
prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content |
|
|
|
|
|
response = co.chat( |
|
model='command-r-plus', |
|
message=prompt, |
|
temperature=0.2, |
|
max_tokens=500, |
|
chat_history=[], |
|
prompt_truncation='AUTO' |
|
) |
|
|
|
summary_response = f"""### |
|
Summary: {response.text} |
|
URL: {url} |
|
### |
|
""" |
|
|
|
return summary_response |
|
|
|
def kickoff_crew(topic: str) -> dict: |
|
try: |
|
"""Kickoff the research process for a given topic using CrewAI components.""" |
|
|
|
groq_api_key = os.environ.get("GROQ_API_KEY") |
|
if not groq_api_key: |
|
raise ValueError("API Key for Groq is not set in environment variables") |
|
|
|
|
|
groq_llm_70b = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="llama3-70b-8192") |
|
cohere_llm = ChatCohere( |
|
temperature=0, |
|
cohere_api_key=cohere_api_key, |
|
model_name="command-r-plus" |
|
) |
|
|
|
|
|
selected_llm = groq_llm_70b |
|
|
|
|
|
researcher = Agent( |
|
role='Researcher', |
|
goal='Search and Collect detailed information on topic ## {topic} ##', |
|
tools=[search_results, web_scrapper], |
|
llm=selected_llm, |
|
backstory=( |
|
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. " |
|
"Your dedication to detail ensures the reliability and thoroughness of your findings. " |
|
"With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results." |
|
), |
|
allow_delegation=False, |
|
max_iter=15, |
|
max_rpm=20, |
|
memory=True, |
|
verbose=True |
|
) |
|
|
|
|
|
editor = Agent( |
|
role='Editor', |
|
goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##', |
|
llm=selected_llm, |
|
backstory=( |
|
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. " |
|
"Your strong command of language and attention to detail ensure that each report not only conveys essential insights " |
|
"but is also easily understandable and appealing to diverse audiences. " |
|
), |
|
allow_delegation=False, |
|
max_iter=5, |
|
max_rpm=15, |
|
memory=True, |
|
verbose=True |
|
) |
|
|
|
|
|
research_task = Task( |
|
description=( |
|
"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. " |
|
"If more detailed searches are required, generate and execute new queries related to ## {topic} ##. " |
|
"Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. " |
|
"Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. " |
|
"Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information." |
|
), |
|
expected_output=( |
|
"A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. " |
|
"Each section should properly cite sources, providing a thorough overview of the information gathered." |
|
), |
|
agent=researcher |
|
) |
|
|
|
|
|
edit_task = Task( |
|
description=( |
|
"Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. " |
|
"Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. " |
|
"Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. " |
|
"Include a section listing all sources used, formatted as bullet points following this template: " |
|
"- title: url'." |
|
), |
|
expected_output=( |
|
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. " |
|
"The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. " |
|
"Ensure the document is grammatically correct and ready for publication or presentation." |
|
), |
|
agent=editor, |
|
context=[research_task] |
|
) |
|
|
|
|
|
crew = Crew( |
|
agents=[researcher, editor], |
|
tasks=[research_task, edit_task], |
|
process=Process.sequential, |
|
) |
|
|
|
|
|
result = crew.kickoff(inputs={'topic': topic}) |
|
return result |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
def main(): |
|
"""Set up the Gradio interface for the CrewAI Research Tool.""" |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## CrewAI Research Tool") |
|
topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...") |
|
submit_button = gr.Button("Start Research") |
|
output = gr.Markdown(label="Result") |
|
|
|
submit_button.click( |
|
fn=kickoff_crew, |
|
inputs=topic_input, |
|
outputs=output |
|
) |
|
|
|
|
|
demo.queue(api_open=False, max_size=3).launch() |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|