Spaces:

JessyNTHUELEBC
/

TABC1021

Runtime error

App Files Files Community

JessyNTHUELEBC commited on Oct 21, 2024

Commit

30f9a75

verified ·

1 Parent(s): d4b4e1b

Update app.py

Browse files

Files changed (1) hide show

app.py +367 -61

app.py CHANGED Viewed

@@ -1,64 +1,370 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-if __name__ == "__main__":
-    demo.launch()

+from langchain.tools import tool
+import requests
+from pydantic import BaseModel, Field
+import datetime
+from geopy.distance import geodesic
+import pandas as pd
+from geopy.distance import geodesic
+from geopy.point import Point
+dataf = pd.read_csv(
+    "HW 1 newest version.csv"
 )
+# Import create_pandas_dataframe_agent from langchain_experimental.agents
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langchain.chat_models import ChatOpenAI
+from langchain.agents.agent_types import AgentType
+# Define the create_dataframe_agent_tool function
+@tool
+def dataframeagent(value: str) -> str:
+    """
+    This function searches the entire dataframe to find rows where any column contains the specified value.
+    Parameters:
+    value (str): The value to search for in all columns.
+    Returns:
+    str: A string representation of the filtered dataframe and the extremes for specified columns.
+    """
+    # First, search the entire dataframe for the specified value
+    #filtered_data = dataf[dataf.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)]
+    #if filtered_data.empty:
+        #return f"No matches found for '{value}'."
+    # Columns for finding highest and lowest values
+    columns_to_check = ['Profit Margin', 'Operating Margin  (ttm)', 'Return on Assets  (ttm)',
+                        'Return on Equity  (ttm)', 'Revenue  (ttm)', 'Revenue Per Share  (ttm)']
+    result = [f"Search Results for '{value}':\n{dataf.to_string(index=False)}\n"]
+    # Find and display highest and lowest values for numerical columns
+    for column in columns_to_check:
+        try:
+            # Convert column to numeric (removing symbols like '%' and 'M' for millions)
+            dataf[column] = pd.to_numeric(dataf[column].str.replace('%', '').str.replace('M', ''), errors='coerce')
+            highest_row = dataf.loc[dataf[column].idxmax()]
+            lowest_row = dataf.loc[dataf[column].idxmin()]
+            result.append(f"Highest {column}:\n{highest_row.to_string()}\n")
+            result.append(f"Lowest {column}:\n{lowest_row.to_string()}\n")
+        except Exception as e:
+            result.append(f"Error processing column {column}: {str(e)}\n")
+    return "\n".join(result)
+import json
+from pathlib import Path
+import pandas as pd
+example_filepath = "QA_summary_zh.csv"
+# Read the CSV file
+csv_data = pd.read_csv(example_filepath, encoding="utf-8")
+# Convert CSV to JSON
+json_data = csv_data.to_json(orient='records', force_ascii=False)
+json_data
+# Save the JSON data to a file
+json_file_path = "QA_summary_zh.json"
+with open(json_file_path, 'w', encoding='utf-8') as json_file:
+    json_file.write(json_data)
+data = json.loads(Path(json_file_path).read_text())
+from langchain.document_loaders import JSONLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+file_path='QA_summary_zh.json'
+# Define jq schema to extract text content.
+# This assumes your JSON has a field named 'text' containing the relevant text.
+jq_schema='.[] | {Question: .Question , Answer: .Answer , description: .description }'
+loader = JSONLoader(
+         file_path=file_path,
+         jq_schema=jq_schema, # Add the jq_schema argument here
+         text_content=False)
+# Load the documents
+docs = loader.load()
+print(docs)
+all_splits = docs
+import json
+from pathlib import Path
+import pandas as pd
+import os
+from langchain_chroma import Chroma
+from langchain_openai import OpenAIEmbeddings
+os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ"
+vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
+# Import necessary modules
+from langchain import hub
+from langchain.prompts import PromptTemplate
+from langchain.schema import StrOutputParser
+from langchain.chains import ConversationChain
+from langchain.memory import ConversationBufferMemory
+from langchain.chat_models import ChatOpenAI
+from langchain.schema import HumanMessage
+from langchain_core.runnables import RunnablePassthrough, RunnableLambda
+@tool
+def FAQ(question: str) -> str:
+    """Processes a question, retrieves relevant context, and generates a response."""
+    # Define the prompt template
+    template = """
+    您是一個繁體中文的助理，以下是從知識庫中檢索到的相關內容，請根據它們回答用戶的問題。
+    內容: {context}
+    問題: {question}
+    """
+    # Function to format documents
+    def format_docs(docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+    # Initialize the language model
+    llm = ChatOpenAI(temperature=0.0)
+    # Initialize the retriever (assuming `vectorstore` is predefined)
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 1})
+    # Initialize the conversation memory
+    memory = ConversationBufferMemory()
+    conversation = ConversationChain(
+        llm=llm,
+        memory=memory,
+        verbose=True
+    )
+    # Retrieve documents using the retriever
+    retrieved_docs = retriever.invoke(question)
+    context = format_docs(retrieved_docs)
+    # Prepare the prompt input
+    prompt_input = {
+        "context": context,
+        "question": question,
+    }
+    # Format prompt_input as a string
+    formatted_prompt_input = template.format(
+        context=prompt_input["context"],
+        question=prompt_input["question"],
+    )
+    # Use the conversation chain to process the formatted input
+    response = conversation.predict(input=formatted_prompt_input)
+    return response
+import requests
+from bs4 import BeautifulSoup
+import random
+# List of different headers to mimic various browser requests
+user_agents = [
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
+    "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
+]
+@tool
+def gresb(query: str) -> str:
+    """Processes a question, retrieves relevant context, and generates a response."""
+    base_url = "https://www.gresb.com/nl-en?s="
+    search_url = f"{base_url}{query.replace(' ', '+')}"
+    # Select a random User-Agent header
+    headers = {
+        "User-Agent": random.choice(user_agents)
+    }
+    # Make a request to the search URL with headers
+    response = requests.get(search_url, headers=headers)
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Parse the HTML content
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract search results (adjust the selector based on the website structure)
+        results = soup.find_all('a', class_='overlay-link z-index-1')
+        # Check if there are any results
+        if results:
+            # Get the first result's link
+            article_url = results[0]['href']
+            # Fetch the HTML content of the article
+            article_response = requests.get(article_url, headers=headers)
+            if article_response.status_code == 200:
+                # Extract and return the article text
+                return extract_article_text(article_response.content)
+            else:
+                return f"Failed to retrieve the article page. Status code: {article_response.status_code}"
+        else:
+            return "No search results found."
+    else:
+        return f"Failed to retrieve search results. Status code: {response.status_code}"
+def extract_article_text(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    # Look for common article structures on GRESB's website
+    article = soup.find('div', class_='wysiwyg')
+    if article:
+        paragraphs = article.find_all(['p', 'ul', 'blockquote', 'h2', 'h4'])  # Includes <p>, <ul>, <blockquote>, <h2>, <h4> tags
+        return ' '.join(p.get_text() for p in paragraphs).strip()
+    return "Article content not found in the provided structure."
+# Example usage
+#query = "london office"
+#article_text = search_and_extract_gresb(query)
+#print(article_text)  # This will print the extracted article content or any status messages
+import os
+import openai
+os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ"
+openai.api_key = os.environ['OPENAI_API_KEY']
+tools = [gresb, dataframeagent,FAQ]
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.tools.render import format_tool_to_openai_function
+from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
+functions = [format_tool_to_openai_function(f) for f in tools]
+model = ChatOpenAI(temperature=0).bind(functions=functions)
+def run_agent(user_input):
+    # 初始化一個空列表，用於存放中間步驟的結果和觀察值
+    intermediate_steps = []
+    max_iterations = 20  # 設置最大迭代次數，以避免無限循環
+    iteration_count = 0
+    # 進入循環，直到代理完成任務或者達到最大迭代次數
+    while iteration_count < max_iterations:
+        iteration_count += 1
+        # 調用處理鏈 (agent_chain) 並傳遞用戶輸入和中間步驟數據
+        result = agent_chain.invoke({
+            "input": user_input,  # 傳遞用戶輸入，這裡是用戶查詢
+            "intermediate_steps": intermediate_steps  # 傳遞中間步驟，初始為空列表
+        })
+        # 如果結果是 AgentFinish 類型，說明代理已經完成任務，返回結果
+        if isinstance(result, AgentFinish):
+            return result.return_values  # 返回代理的最終輸出
+        # Now it's safe to print the message log
+        print(result.message_log)
+        # 根據結果中的工具名稱選擇合適的工具函數
+        tool = {
+            "gresb": gresb,
+            "dataframeagent": dataframeagent,
+            "FAQ":FAQ
+        }.get(result.tool)
+        # 如果工具函數存在，則運行工具函數
+        if tool:
+            observation = tool.run(result.tool_input)
+            # 將當前步驟的結果和觀察值加入 intermediate_steps 列表中
+            intermediate_steps.append((result, observation))
+        else:
+            print(f"未找到合適的工具: {result.tool}")
+            break
+    # 如果迭代次數超過最大限制，返回錯誤信息
+    return "無法完成任務，請稍後再試。"
+    from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
+prompt = ChatPromptTemplate.from_messages([
+    ("system",
+     """You are a helpful assistant. There are three tools to use based on different scenarios.
+    1. gresb Tool:
+    Usage Scenario: Use this tool when you need to search for fund information related to a specific area, city, or keyword on the GRESB website. It is ideal for searching fund details in specific locations such as "London office" or "Paris commercial real estate."
+    2. dataframeagent Tool:
+    Usage Scenario: This dataframe contains 'Fund Name', 'Region', 'Ticker','Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', 'Return on Equity (ttm)',
+    'Revenue (ttm)', and 'Revenue Per Share (ttm)', choose one to search in the dataframe
+    You have access to the following note: GRESB is not a foud.
+    3. FAQ Tool
+    Usage Scenario: use this tool to search for 綠建築標章申請審核認可及使用作業要點.
+    example:「綠建築標章申請審核認可及使用作業要點」規定，修正重點為何？
+    example:109年7月1日起申請綠建築標章評定有何改變？
+"""),
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("user", "{input}"),
+    MessagesPlaceholder(variable_name="agent_scratchpad")
+])
+from langchain.agents.format_scratchpad import format_to_openai_functions
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.agent import AgentFinish
+agent_chain = RunnablePassthrough.assign(
+    agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"])
+) | prompt | model | OpenAIFunctionsAgentOutputParser()
+from langchain.memory import ConversationBufferMemory
+memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")
+from langchain.agents import AgentExecutor
+agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory)
+import gradio as gr
+# 處理函數，提取 AIMessage 的內容
+def process_input(user_input):
+    # 使用 agent_executor.invoke 來處理輸入
+    memory.clear()
+    result = agent_executor.invoke({"input": user_input})
+    # 從結果中提取 AIMessage 的內容
+    if 'output' in result:
+        return result['output']
+    else:
+        return "No output found."
+# 建立 Gradio 介面
+iface = gr.Interface(
+    fn=process_input,  # 處理函數
+    inputs="text",  # 使用者輸入類型
+    outputs="text",  # 輸出類型
+    title="TABC",  # 介面標題
+    description="The chatbot contains: Extracting YahooFinancial data, Scraping GRESB Website, and Retrieving 綠建築申請資料"  # 介面描述
+)
+# 啟動介面
+iface.launch()