pabloce commited on
Commit
e9eeeec
·
verified ·
1 Parent(s): 999432a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -3,11 +3,11 @@ import json
3
  import subprocess
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
-
7
  from duckduckgo_search import DDGS
8
-
9
  from trafilatura import fetch_url, extract
10
 
 
 
11
  subprocess.run(
12
  'pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124',
13
  shell=True)
@@ -19,13 +19,13 @@ hf_hub_download(
19
  local_dir="./models"
20
  )
21
  hf_hub_download(
22
- repo_id="bartowski/Einstein-v6-7B-GGUF",
23
- filename="Einstein-v6-7B-Q6_K.gguf",
24
  local_dir="./models"
25
  )
26
  hf_hub_download(
27
- repo_id="crusoeai/dolphin-2.9-llama3-70b-GGUF",
28
- filename="dolphin-2.9-llama3-70b.Q3_K_M.gguf",
29
  local_dir="./models"
30
  )
31
 
@@ -75,9 +75,9 @@ PLACEHOLDER = """
75
 
76
  def get_context_by_model(model_name):
77
  model_context_limits = {
78
- "Mistral-7B-Instruct-v0.3-f32.gguf": 32000,
79
- "Einstein-v6-7B-Q6_K.gguf": 32000,
80
- "dolphin-2.9-llama3-70b.Q3_K_M.gguf": 8192
81
  }
82
  return model_context_limits.get(model_name, None)
83
 
@@ -120,7 +120,7 @@ def search_web(search_query: str):
120
  result_string += web_info
121
 
122
  res = result_string.strip()
123
- return "Based on the following results, answer the previous user query:\nResults:\n\n" + res[:8000]
124
 
125
 
126
  def get_messages_formatter_type(model_name):
@@ -163,6 +163,7 @@ def respond(
163
  from llama_cpp_agent.chat_history.messages import Roles
164
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
165
  chat_template = get_messages_formatter_type(model)
 
166
 
167
  llm = Llama(
168
  model_path=f"models/{model}",
@@ -170,7 +171,7 @@ def respond(
170
  n_threads=40,
171
  n_gpu_layers=81,
172
  n_batch=1024,
173
- n_ctx=8192,
174
  )
175
  provider = LlamaCppPythonProvider(llm)
176
 
@@ -253,8 +254,8 @@ demo = gr.ChatInterface(
253
  ),
254
  gr.Dropdown([
255
  'Mistral-7B-Instruct-v0.3-f32.gguf',
256
- 'Einstein-v6-7B-Q6_K.gguf',
257
- 'dolphin-2.9-llama3-70b.Q3_K_M.gguf'
258
  ],
259
  value="Mistral-7B-Instruct-v0.3-f32.gguf",
260
  label="Model"
 
3
  import subprocess
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
 
6
  from duckduckgo_search import DDGS
 
7
  from trafilatura import fetch_url, extract
8
 
9
+ model_selected = "Mistral-7B-Instruct-v0.3-f32.gguf"
10
+
11
  subprocess.run(
12
  'pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124',
13
  shell=True)
 
19
  local_dir="./models"
20
  )
21
  hf_hub_download(
22
+ repo_id="crusoeai/dolphin-2.9.1-mixtral-1x22b-GGUF",
23
+ filename="dolphin-2.9.1-mixtral-1x22b.Q6_K.gguf",
24
  local_dir="./models"
25
  )
26
  hf_hub_download(
27
+ repo_id="crusoeai/dolphin-2.9.1-llama-3-8b-GGUF",
28
+ filename="dolphin-2.9.1-llama-3-8b.Q6_K.gguf",
29
  local_dir="./models"
30
  )
31
 
 
75
 
76
  def get_context_by_model(model_name):
77
  model_context_limits = {
78
+ "Mistral-7B-Instruct-v0.3-f32.gguf": 32768,
79
+ "dolphin-2.9.1-mixtral-1x22b.Q6_K.gguf": 16384,
80
+ "dolphin-2.9.1-llama-3-8b.Q6_K.gguf": 8192
81
  }
82
  return model_context_limits.get(model_name, None)
83
 
 
120
  result_string += web_info
121
 
122
  res = result_string.strip()
123
+ return "Based on the following results, answer the previous user query:\nResults:\n\n" + res[:get_context_by_model(model_selected)]
124
 
125
 
126
  def get_messages_formatter_type(model_name):
 
163
  from llama_cpp_agent.chat_history.messages import Roles
164
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
165
  chat_template = get_messages_formatter_type(model)
166
+ model_selected = model
167
 
168
  llm = Llama(
169
  model_path=f"models/{model}",
 
171
  n_threads=40,
172
  n_gpu_layers=81,
173
  n_batch=1024,
174
+ n_ctx=get_context_by_model(model),
175
  )
176
  provider = LlamaCppPythonProvider(llm)
177
 
 
254
  ),
255
  gr.Dropdown([
256
  'Mistral-7B-Instruct-v0.3-f32.gguf',
257
+ 'dolphin-2.9.1-mixtral-1x22b.Q6_K.gguf',
258
+ 'dolphin-2.9.1-llama-3-8b.Q6_K.gguf'
259
  ],
260
  value="Mistral-7B-Instruct-v0.3-f32.gguf",
261
  label="Model"