ffreemt
commited on
Commit
·
c002b9c
1
Parent(s):
3617af0
Update q3ks 29.7G
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: langchain-
|
| 3 |
emoji: 🚀
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
|
|
|
| 1 |
---
|
| 2 |
+
title: langchain-llama-2-70b-guanaco-qlora-ggml
|
| 3 |
emoji: 🚀
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
app.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
# ruff: noqa: E501
|
| 4 |
import gc
|
| 5 |
import os
|
| 6 |
-
import platform
|
| 7 |
import random
|
| 8 |
import time
|
| 9 |
from collections import deque
|
|
@@ -31,49 +30,7 @@ from loguru import logger
|
|
| 31 |
deq = deque()
|
| 32 |
sig_end = object() # signals the processing is done
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
filename_list = [
|
| 37 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
|
| 38 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
|
| 39 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
|
| 40 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
|
| 41 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
|
| 42 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
|
| 43 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
|
| 44 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
|
| 45 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
|
| 46 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
|
| 47 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
|
| 48 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
|
| 49 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
|
| 50 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
|
| 51 |
-
]
|
| 52 |
-
|
| 53 |
-
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
|
| 54 |
-
|
| 55 |
-
url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
|
| 56 |
-
url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
|
| 57 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
|
| 58 |
-
url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
|
| 59 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
|
| 60 |
-
|
| 61 |
-
url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
|
| 62 |
-
|
| 63 |
-
_ = (
|
| 64 |
-
"golay" in platform.node()
|
| 65 |
-
or "okteto" in platform.node()
|
| 66 |
-
or Path("/kaggle").exists()
|
| 67 |
-
# or psutil.cpu_count(logical=False) < 4
|
| 68 |
-
or 1 # run 7b in hf
|
| 69 |
-
)
|
| 70 |
-
|
| 71 |
-
if _:
|
| 72 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
|
| 73 |
-
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
|
| 74 |
-
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
|
| 75 |
-
url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
|
| 76 |
-
|
| 77 |
|
| 78 |
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
| 79 |
|
|
@@ -139,13 +96,23 @@ prompt_template = """You are a helpful assistant. Let's think step by step.
|
|
| 139 |
### HUMAN:
|
| 140 |
{input}
|
| 141 |
### RESPONSE:"""
|
| 142 |
-
|
| 143 |
-
# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
|
| 144 |
-
|
| 145 |
human_prefix = "### HUMAN"
|
| 146 |
ai_prefix = "### RESPONSE"
|
| 147 |
stop = [f"{human_prefix}:"]
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
|
| 150 |
stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
| 151 |
|
|
@@ -430,7 +397,7 @@ with gr.Blocks(
|
|
| 430 |
gr.Markdown(
|
| 431 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
| 432 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
| 433 |
-
It typically takes about
|
| 434 |
|
| 435 |
Most examples are meant for another model.
|
| 436 |
You probably should try to test
|
|
|
|
| 3 |
# ruff: noqa: E501
|
| 4 |
import gc
|
| 5 |
import os
|
|
|
|
| 6 |
import random
|
| 7 |
import time
|
| 8 |
from collections import deque
|
|
|
|
| 30 |
deq = deque()
|
| 31 |
sig_end = object() # signals the processing is done
|
| 32 |
|
| 33 |
+
url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
| 36 |
|
|
|
|
| 96 |
### HUMAN:
|
| 97 |
{input}
|
| 98 |
### RESPONSE:"""
|
|
|
|
|
|
|
|
|
|
| 99 |
human_prefix = "### HUMAN"
|
| 100 |
ai_prefix = "### RESPONSE"
|
| 101 |
stop = [f"{human_prefix}:"]
|
| 102 |
|
| 103 |
+
# Prompt template: Guanaco
|
| 104 |
+
prompt_template = """You are a helpful assistant. Let's think step by step.
|
| 105 |
+
{history}
|
| 106 |
+
### Human:
|
| 107 |
+
{input}
|
| 108 |
+
### Assistant:"""
|
| 109 |
+
human_prefix = "### Human"
|
| 110 |
+
ai_prefix = "### Assistant"
|
| 111 |
+
stop = [f"{human_prefix}:"]
|
| 112 |
+
|
| 113 |
+
# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
|
| 117 |
stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
| 118 |
|
|
|
|
| 397 |
gr.Markdown(
|
| 398 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
| 399 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
| 400 |
+
It typically takes about xxx seconds for the first response to appear.
|
| 401 |
|
| 402 |
Most examples are meant for another model.
|
| 403 |
You probably should try to test
|