JessyNTHUELEBC commited on
Commit
30f9a75
·
verified ·
1 Parent(s): d4b4e1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +367 -61
app.py CHANGED
@@ -1,64 +1,370 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
  )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
+ from langchain.tools import tool
2
+ import requests
3
+ from pydantic import BaseModel, Field
4
+ import datetime
5
+
6
+
7
+ from geopy.distance import geodesic
8
+ import pandas as pd
9
+ from geopy.distance import geodesic
10
+ from geopy.point import Point
11
+
12
+ dataf = pd.read_csv(
13
+ "HW 1 newest version.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
+ # Import create_pandas_dataframe_agent from langchain_experimental.agents
17
+ from langchain_experimental.agents import create_pandas_dataframe_agent
18
+ from langchain.chat_models import ChatOpenAI
19
+ from langchain.agents.agent_types import AgentType
20
+
21
+ # Define the create_dataframe_agent_tool function
22
+ @tool
23
+ def dataframeagent(value: str) -> str:
24
+ """
25
+ This function searches the entire dataframe to find rows where any column contains the specified value.
26
+
27
+
28
+ Parameters:
29
+ value (str): The value to search for in all columns.
30
+
31
+ Returns:
32
+ str: A string representation of the filtered dataframe and the extremes for specified columns.
33
+ """
34
+ # First, search the entire dataframe for the specified value
35
+ #filtered_data = dataf[dataf.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)]
36
+
37
+ #if filtered_data.empty:
38
+ #return f"No matches found for '{value}'."
39
+
40
+ # Columns for finding highest and lowest values
41
+ columns_to_check = ['Profit Margin', 'Operating Margin  (ttm)', 'Return on Assets  (ttm)',
42
+ 'Return on Equity  (ttm)', 'Revenue  (ttm)', 'Revenue Per Share  (ttm)']
43
+
44
+ result = [f"Search Results for '{value}':\n{dataf.to_string(index=False)}\n"]
45
+
46
+ # Find and display highest and lowest values for numerical columns
47
+ for column in columns_to_check:
48
+ try:
49
+ # Convert column to numeric (removing symbols like '%' and 'M' for millions)
50
+ dataf[column] = pd.to_numeric(dataf[column].str.replace('%', '').str.replace('M', ''), errors='coerce')
51
+
52
+ highest_row = dataf.loc[dataf[column].idxmax()]
53
+ lowest_row = dataf.loc[dataf[column].idxmin()]
54
+
55
+ result.append(f"Highest {column}:\n{highest_row.to_string()}\n")
56
+ result.append(f"Lowest {column}:\n{lowest_row.to_string()}\n")
57
+ except Exception as e:
58
+ result.append(f"Error processing column {column}: {str(e)}\n")
59
+
60
+ return "\n".join(result)
61
+
62
+ import json
63
+ from pathlib import Path
64
+ import pandas as pd
65
+
66
+ example_filepath = "QA_summary_zh.csv"
67
+
68
+ # Read the CSV file
69
+ csv_data = pd.read_csv(example_filepath, encoding="utf-8")
70
+
71
+ # Convert CSV to JSON
72
+ json_data = csv_data.to_json(orient='records', force_ascii=False)
73
+ json_data
74
+
75
+ # Save the JSON data to a file
76
+ json_file_path = "QA_summary_zh.json"
77
+ with open(json_file_path, 'w', encoding='utf-8') as json_file:
78
+ json_file.write(json_data)
79
+
80
+ data = json.loads(Path(json_file_path).read_text())
81
+
82
+ from langchain.document_loaders import JSONLoader
83
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
84
+
85
+
86
+ file_path='QA_summary_zh.json'
87
+
88
+ # Define jq schema to extract text content.
89
+ # This assumes your JSON has a field named 'text' containing the relevant text.
90
+ jq_schema='.[] | {Question: .Question , Answer: .Answer , description: .description }'
91
+
92
+ loader = JSONLoader(
93
+ file_path=file_path,
94
+ jq_schema=jq_schema, # Add the jq_schema argument here
95
+ text_content=False)
96
+
97
+ # Load the documents
98
+ docs = loader.load()
99
+ print(docs)
100
+
101
+ all_splits = docs
102
+
103
+ import json
104
+ from pathlib import Path
105
+ import pandas as pd
106
+ import os
107
+
108
+ from langchain_chroma import Chroma
109
+ from langchain_openai import OpenAIEmbeddings
110
+ os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ"
111
+ vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
112
+
113
+ # Import necessary modules
114
+ from langchain import hub
115
+ from langchain.prompts import PromptTemplate
116
+ from langchain.schema import StrOutputParser
117
+ from langchain.chains import ConversationChain
118
+ from langchain.memory import ConversationBufferMemory
119
+ from langchain.chat_models import ChatOpenAI
120
+ from langchain.schema import HumanMessage
121
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
122
+
123
+
124
+ @tool
125
+ def FAQ(question: str) -> str:
126
+ """Processes a question, retrieves relevant context, and generates a response."""
127
+
128
+ # Define the prompt template
129
+ template = """
130
+ 您是一個繁體中文的助理,以下是從知識庫中檢索到的相關內容,請根據它們回答用戶的問題。
131
+
132
+ 內容: {context}
133
+
134
+ 問題: {question}
135
+
136
+
137
+
138
+ """
139
+
140
+ # Function to format documents
141
+ def format_docs(docs):
142
+ return "\n\n".join(doc.page_content for doc in docs)
143
+
144
+ # Initialize the language model
145
+ llm = ChatOpenAI(temperature=0.0)
146
+
147
+ # Initialize the retriever (assuming `vectorstore` is predefined)
148
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 1})
149
+
150
+ # Initialize the conversation memory
151
+ memory = ConversationBufferMemory()
152
+ conversation = ConversationChain(
153
+ llm=llm,
154
+ memory=memory,
155
+ verbose=True
156
+ )
157
+
158
+ # Retrieve documents using the retriever
159
+ retrieved_docs = retriever.invoke(question)
160
+ context = format_docs(retrieved_docs)
161
+
162
+ # Prepare the prompt input
163
+ prompt_input = {
164
+ "context": context,
165
+ "question": question,
166
+ }
167
+
168
+ # Format prompt_input as a string
169
+ formatted_prompt_input = template.format(
170
+ context=prompt_input["context"],
171
+ question=prompt_input["question"],
172
+ )
173
+
174
+ # Use the conversation chain to process the formatted input
175
+ response = conversation.predict(input=formatted_prompt_input)
176
+
177
+ return response
178
+
179
+ import requests
180
+ from bs4 import BeautifulSoup
181
+ import random
182
+
183
+ # List of different headers to mimic various browser requests
184
+ user_agents = [
185
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
186
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
187
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36",
188
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
189
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
190
+ ]
191
+
192
+ @tool
193
+ def gresb(query: str) -> str:
194
+ """Processes a question, retrieves relevant context, and generates a response."""
195
+ base_url = "https://www.gresb.com/nl-en?s="
196
+ search_url = f"{base_url}{query.replace(' ', '+')}"
197
+
198
+ # Select a random User-Agent header
199
+ headers = {
200
+ "User-Agent": random.choice(user_agents)
201
+ }
202
+
203
+ # Make a request to the search URL with headers
204
+ response = requests.get(search_url, headers=headers)
205
+
206
+ # Check if the request was successful
207
+ if response.status_code == 200:
208
+ # Parse the HTML content
209
+ soup = BeautifulSoup(response.content, 'html.parser')
210
+
211
+ # Extract search results (adjust the selector based on the website structure)
212
+ results = soup.find_all('a', class_='overlay-link z-index-1')
213
+
214
+ # Check if there are any results
215
+ if results:
216
+ # Get the first result's link
217
+ article_url = results[0]['href']
218
+
219
+ # Fetch the HTML content of the article
220
+ article_response = requests.get(article_url, headers=headers)
221
+
222
+ if article_response.status_code == 200:
223
+ # Extract and return the article text
224
+ return extract_article_text(article_response.content)
225
+ else:
226
+ return f"Failed to retrieve the article page. Status code: {article_response.status_code}"
227
+ else:
228
+ return "No search results found."
229
+ else:
230
+ return f"Failed to retrieve search results. Status code: {response.status_code}"
231
+
232
+ def extract_article_text(html_content):
233
+ soup = BeautifulSoup(html_content, 'html.parser')
234
+
235
+ # Look for common article structures on GRESB's website
236
+ article = soup.find('div', class_='wysiwyg')
237
+ if article:
238
+ paragraphs = article.find_all(['p', 'ul', 'blockquote', 'h2', 'h4']) # Includes <p>, <ul>, <blockquote>, <h2>, <h4> tags
239
+ return ' '.join(p.get_text() for p in paragraphs).strip()
240
+
241
+ return "Article content not found in the provided structure."
242
+
243
+ # Example usage
244
+ #query = "london office"
245
+ #article_text = search_and_extract_gresb(query)
246
+ #print(article_text) # This will print the extracted article content or any status messages
247
+
248
+ import os
249
+ import openai
250
+
251
+ os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ"
252
+ openai.api_key = os.environ['OPENAI_API_KEY']
253
+ tools = [gresb, dataframeagent,FAQ]
254
+
255
+ from langchain.chat_models import ChatOpenAI
256
+ from langchain.prompts import ChatPromptTemplate
257
+ from langchain.tools.render import format_tool_to_openai_function
258
+ from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
259
+
260
+ functions = [format_tool_to_openai_function(f) for f in tools]
261
+ model = ChatOpenAI(temperature=0).bind(functions=functions)
262
+
263
+ def run_agent(user_input):
264
+ # 初始化一個空列表,用於存放中間步驟的結果和觀察值
265
+ intermediate_steps = []
266
+ max_iterations = 20 # 設置最大迭代次數,以避免無限循環
267
+ iteration_count = 0
268
+
269
+ # 進入循環,直到代理完成任務或者達到最大迭代次數
270
+ while iteration_count < max_iterations:
271
+ iteration_count += 1
272
+
273
+ # 調用處理鏈 (agent_chain) 並傳遞用戶輸入和中間步驟數據
274
+ result = agent_chain.invoke({
275
+ "input": user_input, # 傳遞用戶輸入,這裡是用戶查詢
276
+ "intermediate_steps": intermediate_steps # 傳遞中間步驟,初始為空列表
277
+ })
278
+
279
+ # 如果結果是 AgentFinish 類型,說明代理已經完成任務,返回結果
280
+ if isinstance(result, AgentFinish):
281
+ return result.return_values # 返回代理的最終輸出
282
+
283
+ # Now it's safe to print the message log
284
+ print(result.message_log)
285
+
286
+ # 根據結果中的工具名稱選擇合適的工具函數
287
+ tool = {
288
+ "gresb": gresb,
289
+ "dataframeagent": dataframeagent,
290
+ "FAQ":FAQ
291
+
292
+ }.get(result.tool)
293
+
294
+ # 如果工具函數存在,則運行工具函數
295
+ if tool:
296
+ observation = tool.run(result.tool_input)
297
+ # 將當前步驟的結果和觀察值加入 intermediate_steps 列表中
298
+ intermediate_steps.append((result, observation))
299
+ else:
300
+ print(f"未找到合適的工具: {result.tool}")
301
+ break
302
+
303
+ # 如果迭代次數超過最大限制,返回錯誤信息
304
+ return "無法完成任務,請稍後再試。"
305
+
306
+ from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
307
+
308
+ prompt = ChatPromptTemplate.from_messages([
309
+ ("system",
310
+ """You are a helpful assistant. There are three tools to use based on different scenarios.
311
+ 1. gresb Tool:
312
+ Usage Scenario: Use this tool when you need to search for fund information related to a specific area, city, or keyword on the GRESB website. It is ideal for searching fund details in specific locations such as "London office" or "Paris commercial real estate."
313
+
314
+
315
+ 2. dataframeagent Tool:
316
+ Usage Scenario: This dataframe contains 'Fund Name', 'Region', 'Ticker','Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', 'Return on Equity (ttm)',
317
+ 'Revenue (ttm)', and 'Revenue Per Share (ttm)', choose one to search in the dataframe
318
+ You have access to the following note: GRESB is not a foud.
319
+
320
+ 3. FAQ Tool
321
+ Usage Scenario: use this tool to search for 綠建築標章申請審核認可及使用作業要點.
322
+ example:「綠建築標章申請審核認可及使用作業要點」規定,修正重點為何?
323
+ example:109年7月1日起申請綠建築標章評定有何改變?
324
+
325
+
326
+ """),
327
+ MessagesPlaceholder(variable_name="chat_history"),
328
+ ("user", "{input}"),
329
+ MessagesPlaceholder(variable_name="agent_scratchpad")
330
+ ])
331
+
332
+
333
+ from langchain.agents.format_scratchpad import format_to_openai_functions
334
+ from langchain.schema.runnable import RunnablePassthrough
335
+ from langchain.schema.agent import AgentFinish
336
+ agent_chain = RunnablePassthrough.assign(
337
+ agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"])
338
+ ) | prompt | model | OpenAIFunctionsAgentOutputParser()
339
+
340
+ from langchain.memory import ConversationBufferMemory
341
+ memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")
342
+
343
+ from langchain.agents import AgentExecutor
344
+ agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory)
345
+
346
+ import gradio as gr
347
+
348
+ # 處理函數,提取 AIMessage 的內容
349
+ def process_input(user_input):
350
+ # 使用 agent_executor.invoke 來處理輸入
351
+ memory.clear()
352
+ result = agent_executor.invoke({"input": user_input})
353
+
354
+ # 從結果中提取 AIMessage 的內容
355
+ if 'output' in result:
356
+ return result['output']
357
+ else:
358
+ return "No output found."
359
+
360
+ # 建立 Gradio 介面
361
+ iface = gr.Interface(
362
+ fn=process_input, # 處理函數
363
+ inputs="text", # 使用者輸入類型
364
+ outputs="text", # 輸出類型
365
+ title="TABC", # 介面標題
366
+ description="The chatbot contains: Extracting YahooFinancial data, Scraping GRESB Website, and Retrieving 綠建築申請資料" # 介面描述
367
+ )
368
 
369
+ # 啟動介面
370
+ iface.launch()