pabloce commited on
Commit
1ea65ea
·
verified ·
1 Parent(s): 76397ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -18,6 +18,9 @@ from ui import css, PLACEHOLDER
18
  from utils import CitingSources
19
  from settings import get_context_by_model, get_messages_formatter_type
20
 
 
 
 
21
  hf_hub_download(
22
  repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
23
  filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
@@ -60,14 +63,18 @@ def respond(
60
  top_k,
61
  repeat_penalty,
62
  ):
 
 
63
  chat_template = get_messages_formatter_type(model)
64
- llm = Llama(
65
- model_path=f"models/{model}",
66
- flash_attn=True,
67
- n_gpu_layers=81,
68
- n_batch=1024,
69
- n_ctx=get_context_by_model(model),
70
- )
 
 
71
  provider = LlamaCppPythonProvider(llm)
72
  logging.info(f"Loaded chat examples: {chat_template}")
73
  search_tool = WebSearchTool(
 
18
  from utils import CitingSources
19
  from settings import get_context_by_model, get_messages_formatter_type
20
 
21
+ llm = None
22
+ llm_model = None
23
+
24
  hf_hub_download(
25
  repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
26
  filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
 
63
  top_k,
64
  repeat_penalty,
65
  ):
66
+ global llm
67
+ global llm_model
68
  chat_template = get_messages_formatter_type(model)
69
+ if llm is None or llm_model != model:
70
+ llm = Llama(
71
+ model_path=f"models/{model}",
72
+ flash_attn=True,
73
+ n_gpu_layers=81,
74
+ n_batch=1024,
75
+ n_ctx=get_context_by_model(model),
76
+ )
77
+ llm_model = model
78
  provider = LlamaCppPythonProvider(llm)
79
  logging.info(f"Loaded chat examples: {chat_template}")
80
  search_tool = WebSearchTool(