Spaces:
Runtime error
Runtime error
eliujl
commited on
Commit
·
11c3099
1
Parent(s):
ac1251c
Updated local LLM support
Browse filesAdded Mixtral model support. Corrected local LLM model_path.
app.py
CHANGED
|
@@ -30,6 +30,8 @@ local_model_tuples = [
|
|
| 30 |
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
| 31 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 32 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
|
|
|
|
|
|
| 33 |
]
|
| 34 |
local_model_names = [t[1] for t in local_model_tuples]
|
| 35 |
langchain.verbose = False
|
|
@@ -162,28 +164,33 @@ def use_local_llm(r_llm, local_llm_path):
|
|
| 162 |
model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
|
| 163 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
| 164 |
model_path = os.path.normpath( model_path )
|
|
|
|
|
|
|
| 165 |
if not os.path.exists(model_path):
|
| 166 |
print("model not existing at ", model_path, "\n")
|
| 167 |
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
| 168 |
#cache_dir=local_llm_path,
|
| 169 |
-
local_dir=local_llm_path,
|
|
|
|
|
|
|
| 170 |
print("\n model downloaded at path=",model_path)
|
| 171 |
else:
|
| 172 |
print("model existing at ", model_path)
|
| 173 |
|
| 174 |
llm = LlamaCpp(
|
| 175 |
model_path=model_path,
|
| 176 |
-
temperature=0.0,
|
| 177 |
-
n_batch=300,
|
| 178 |
n_ctx=4000,
|
| 179 |
max_tokens=2000,
|
| 180 |
-
n_gpu_layers=10,
|
| 181 |
-
n_threads=12,
|
| 182 |
-
top_p=1,
|
| 183 |
-
repeat_penalty=1.15,
|
| 184 |
-
verbose=False,
|
| 185 |
-
callback_manager=callback_manager,
|
| 186 |
-
streaming=True,
|
|
|
|
| 187 |
# verbose=True, # Verbose is required to pass to the callback manager
|
| 188 |
)
|
| 189 |
return llm
|
|
@@ -193,6 +200,7 @@ def setup_prompt(r_llm):
|
|
| 193 |
B_INST, E_INST = "[INST]", "[/INST]"
|
| 194 |
B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
| 195 |
B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
|
|
|
|
| 196 |
system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
|
| 197 |
Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
|
| 198 |
Think step by step and do not jump to conclusion during your reasoning at the beginning.
|
|
@@ -213,8 +221,13 @@ def setup_prompt(r_llm):
|
|
| 213 |
entry = local_model_names.index(r_llm)
|
| 214 |
if local_model_tuples[entry][4] == 'llama':
|
| 215 |
template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
|
| 216 |
-
|
| 217 |
template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
prompt = PromptTemplate(
|
| 219 |
input_variables=["context", "chat_history", "question"], template=template
|
| 220 |
)
|
|
|
|
| 30 |
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
| 31 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 32 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 33 |
+
(5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
|
| 34 |
+
(6, 'mixtral_inst', "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF", "mixtral-8x7b-instruct-v0.1.Q2_K.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"),
|
| 35 |
]
|
| 36 |
local_model_names = [t[1] for t in local_model_tuples]
|
| 37 |
langchain.verbose = False
|
|
|
|
| 164 |
model_id, local_model_name, model_name, model_file, model_type, model_link = local_model_tuples[entry]
|
| 165 |
model_path = os.path.join( local_llm_path, model_name, model_file )
|
| 166 |
model_path = os.path.normpath( model_path )
|
| 167 |
+
model_dir = os.path.join( local_llm_path, model_name )
|
| 168 |
+
model_dir = os.path.normpath( model_dir )
|
| 169 |
if not os.path.exists(model_path):
|
| 170 |
print("model not existing at ", model_path, "\n")
|
| 171 |
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
| 172 |
#cache_dir=local_llm_path,
|
| 173 |
+
#local_dir=local_llm_path,
|
| 174 |
+
local_dir=model_dir,
|
| 175 |
+
local_dir_use_symlinks=False)
|
| 176 |
print("\n model downloaded at path=",model_path)
|
| 177 |
else:
|
| 178 |
print("model existing at ", model_path)
|
| 179 |
|
| 180 |
llm = LlamaCpp(
|
| 181 |
model_path=model_path,
|
| 182 |
+
# temperature=0.0,
|
| 183 |
+
# n_batch=300,
|
| 184 |
n_ctx=4000,
|
| 185 |
max_tokens=2000,
|
| 186 |
+
# n_gpu_layers=10,
|
| 187 |
+
# n_threads=12,
|
| 188 |
+
# top_p=1,
|
| 189 |
+
# repeat_penalty=1.15,
|
| 190 |
+
# verbose=False,
|
| 191 |
+
# callback_manager=callback_manager,
|
| 192 |
+
# streaming=True,
|
| 193 |
+
# chat_format="llama-2",
|
| 194 |
# verbose=True, # Verbose is required to pass to the callback manager
|
| 195 |
)
|
| 196 |
return llm
|
|
|
|
| 200 |
B_INST, E_INST = "[INST]", "[/INST]"
|
| 201 |
B_SYS_LLAMA, E_SYS_LLAMA = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
| 202 |
B_SYS_MIS, E_SYS_MIS = "<s> ", "</s> "
|
| 203 |
+
B_SYS_MIXTRAL, E_SYS_MIXTRAL = "<s>[INST]", "[/INST]</s>[INST]"
|
| 204 |
system_prompt = """Answer the question in your own words as truthfully as possible from the context given to you.
|
| 205 |
Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
|
| 206 |
Think step by step and do not jump to conclusion during your reasoning at the beginning.
|
|
|
|
| 221 |
entry = local_model_names.index(r_llm)
|
| 222 |
if local_model_tuples[entry][4] == 'llama':
|
| 223 |
template = B_INST + B_SYS_LLAMA + system_prompt + E_SYS_LLAMA + instruction + E_INST
|
| 224 |
+
elif local_model_tuples[entry][4] == 'mistral':
|
| 225 |
template = B_SYS_MIS + B_INST + system_prompt + E_INST + E_SYS_MIS + B_INST + instruction + E_INST
|
| 226 |
+
elif local_model_tuples[entry][4] == 'mixtral':
|
| 227 |
+
template = B_SYS_MIXTRAL + system_prompt + E_SYS_MIXTRAL + B_INST + instruction + E_INST
|
| 228 |
+
else:
|
| 229 |
+
# Handle other models or raise an exception
|
| 230 |
+
pass
|
| 231 |
prompt = PromptTemplate(
|
| 232 |
input_variables=["context", "chat_history", "question"], template=template
|
| 233 |
)
|