Files changed (1) hide show
  1. app.py +94 -28
app.py CHANGED
@@ -4,23 +4,29 @@ import subprocess
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
 
7
- subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
 
 
 
 
 
 
8
  subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
9
 
10
  hf_hub_download(
11
  repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
12
  filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
13
- local_dir = "./models"
14
  )
15
  hf_hub_download(
16
  repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF",
17
  filename="Llama-3-8B-Synthia-v3.5-f16.gguf",
18
- local_dir = "./models"
19
  )
20
  hf_hub_download(
21
  repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
22
  filename="Mistral-7B-Instruct-v0.3-f32.gguf",
23
- local_dir = "./models"
24
  )
25
 
26
  css = """
@@ -41,6 +47,49 @@ css = """
41
  }
42
  """
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def get_messages_formatter_type(model_name):
45
  from llama_cpp_agent import MessagesFormatterType
46
  if "Llama" in model_name:
@@ -50,24 +99,32 @@ def get_messages_formatter_type(model_name):
50
  else:
51
  raise ValueError(f"Unsupported model: {model_name}")
52
 
 
 
 
 
 
 
 
 
53
  @spaces.GPU(duration=120)
54
  def respond(
55
- message,
56
- history: list[tuple[str, str]],
57
- system_message,
58
- max_tokens,
59
- temperature,
60
- top_p,
61
- top_k,
62
- repeat_penalty,
63
- model,
64
  ):
65
  from llama_cpp import Llama
66
  from llama_cpp_agent import LlamaCppAgent
67
  from llama_cpp_agent.providers import LlamaCppPythonProvider
68
  from llama_cpp_agent.chat_history import BasicChatHistory
69
  from llama_cpp_agent.chat_history.messages import Roles
70
-
71
  chat_template = get_messages_formatter_type(model)
72
 
73
  llm = Llama(
@@ -86,7 +143,7 @@ def respond(
86
  predefined_messages_formatter_type=chat_template,
87
  debug_output=True
88
  )
89
-
90
  settings = provider.get_provider_default_settings()
91
  settings.temperature = temperature
92
  settings.top_k = top_k
@@ -94,7 +151,8 @@ def respond(
94
  settings.max_tokens = max_tokens
95
  settings.repeat_penalty = repeat_penalty
96
  settings.stream = True
97
-
 
98
  messages = BasicChatHistory()
99
 
100
  for msn in history:
@@ -108,20 +166,27 @@ def respond(
108
  }
109
  messages.add_message(user)
110
  messages.add_message(assistant)
111
-
 
 
 
 
 
 
 
 
 
112
  stream = agent.get_chat_response(
113
- message,
114
- llm_sampling_settings=settings,
115
- chat_history=messages,
116
- returns_streaming_generator=True,
117
  print_output=False
118
  )
119
-
120
  outputs = ""
121
  for output in stream:
122
  outputs += output
123
  yield outputs
124
 
 
125
  demo = gr.ChatInterface(
126
  respond,
127
  additional_inputs=[
@@ -150,15 +215,16 @@ demo = gr.ChatInterface(
150
  label="Repetition penalty",
151
  ),
152
  gr.Dropdown([
153
- 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf',
154
- 'Llama-3-8B-Synthia-v3.5-f16.gguf',
155
- 'Mistral-7B-Instruct-v0.3-f32.gguf'
156
- ],
157
  value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
158
  label="Model"
159
  ),
160
  ],
161
- theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
 
162
  body_background_fill_dark="#111111",
163
  block_background_fill_dark="#111111",
164
  block_border_width="1px",
@@ -178,4 +244,4 @@ demo = gr.ChatInterface(
178
  )
179
 
180
  if __name__ == "__main__":
181
- demo.launch()
 
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
 
7
+ from duckduckgo_search import DDGS
8
+
9
+ from trafilatura import fetch_url, extract
10
+
11
+ subprocess.run(
12
+ 'pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124',
13
+ shell=True)
14
  subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
15
 
16
  hf_hub_download(
17
  repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
18
  filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
19
+ local_dir="./models"
20
  )
21
  hf_hub_download(
22
  repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF",
23
  filename="Llama-3-8B-Synthia-v3.5-f16.gguf",
24
+ local_dir="./models"
25
  )
26
  hf_hub_download(
27
  repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
28
  filename="Mistral-7B-Instruct-v0.3-f32.gguf",
29
+ local_dir="./models"
30
  )
31
 
32
  css = """
 
47
  }
48
  """
49
 
50
+
51
+ def get_website_content_from_url(url: str) -> str:
52
+ """
53
+ Get website content from a URL using Selenium and BeautifulSoup for improved content extraction and filtering.
54
+
55
+ Args:
56
+ url (str): URL to get website content from.
57
+
58
+ Returns:
59
+ str: Extracted content including title, main text, and tables.
60
+ """
61
+
62
+ try:
63
+ downloaded = fetch_url(url)
64
+
65
+ result = extract(downloaded, include_formatting=True, include_links=True, output_format='json', url=url)
66
+
67
+ if result:
68
+ result = json.loads(result)
69
+ return f'=========== Website Title: {result["title"]} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{result["raw_text"]}\n\n=========== Website Content End ===========\n\n'
70
+ else:
71
+ return ""
72
+ except Exception as e:
73
+ return f"An error occurred: {str(e)}"
74
+
75
+
76
+ def search_web(search_query: str):
77
+ """
78
+ Search the web for information.
79
+ Args:
80
+ search_query (str): Search query to search for.
81
+ """
82
+ results = DDGS().text(search_query, region='wt-wt', safesearch='off', timelimit='y', max_results=3)
83
+ result_string = ''
84
+ for res in results:
85
+ web_info = get_website_content_from_url(res['href'])
86
+ if web_info != "":
87
+ result_string += web_info
88
+
89
+ res = result_string.strip()
90
+ return "Based on the following results, answer the previous user query:\nResults:\n\n" + res
91
+
92
+
93
  def get_messages_formatter_type(model_name):
94
  from llama_cpp_agent import MessagesFormatterType
95
  if "Llama" in model_name:
 
99
  else:
100
  raise ValueError(f"Unsupported model: {model_name}")
101
 
102
+
103
+ def write_message_to_user():
104
+ """
105
+ Let you write a message to the user.
106
+ """
107
+ return "Please write the message to the user."
108
+
109
+
110
  @spaces.GPU(duration=120)
111
  def respond(
112
+ message,
113
+ history: list[tuple[str, str]],
114
+ system_message,
115
+ max_tokens,
116
+ temperature,
117
+ top_p,
118
+ top_k,
119
+ repeat_penalty,
120
+ model,
121
  ):
122
  from llama_cpp import Llama
123
  from llama_cpp_agent import LlamaCppAgent
124
  from llama_cpp_agent.providers import LlamaCppPythonProvider
125
  from llama_cpp_agent.chat_history import BasicChatHistory
126
  from llama_cpp_agent.chat_history.messages import Roles
127
+ from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
128
  chat_template = get_messages_formatter_type(model)
129
 
130
  llm = Llama(
 
143
  predefined_messages_formatter_type=chat_template,
144
  debug_output=True
145
  )
146
+
147
  settings = provider.get_provider_default_settings()
148
  settings.temperature = temperature
149
  settings.top_k = top_k
 
151
  settings.max_tokens = max_tokens
152
  settings.repeat_penalty = repeat_penalty
153
  settings.stream = True
154
+ output_settings = LlmStructuredOutputSettings.from_functions(
155
+ [search_web, write_message_to_user])
156
  messages = BasicChatHistory()
157
 
158
  for msn in history:
 
166
  }
167
  messages.add_message(user)
168
  messages.add_message(assistant)
169
+ result = agent.get_chat_response(message, llm_sampling_settings=settings, structured_output_settings=output_settings,
170
+ chat_history=messages,
171
+ print_output=False)
172
+ while True:
173
+ if result[0]["function"] == "write_message_to_user":
174
+ break
175
+ else:
176
+ result = agent.get_chat_response(result[0]["return_value"], role=Roles.tool, chat_history=messages,structured_output_settings=output_settings,
177
+ print_output=False)
178
+
179
  stream = agent.get_chat_response(
180
+ result[0]["return_value"], role=Roles.tool, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True,
 
 
 
181
  print_output=False
182
  )
183
+
184
  outputs = ""
185
  for output in stream:
186
  outputs += output
187
  yield outputs
188
 
189
+
190
  demo = gr.ChatInterface(
191
  respond,
192
  additional_inputs=[
 
215
  label="Repetition penalty",
216
  ),
217
  gr.Dropdown([
218
+ 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf',
219
+ 'Llama-3-8B-Synthia-v3.5-f16.gguf',
220
+ 'Mistral-7B-Instruct-v0.3-f32.gguf'
221
+ ],
222
  value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
223
  label="Model"
224
  ),
225
  ],
226
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",
227
+ font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
228
  body_background_fill_dark="#111111",
229
  block_background_fill_dark="#111111",
230
  block_border_width="1px",
 
244
  )
245
 
246
  if __name__ == "__main__":
247
+ demo.launch()