ffreemt
commited on
Commit
•
c002b9c
1
Parent(s):
3617af0
Update q3ks 29.7G
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: langchain-
|
3 |
emoji: 🚀
|
4 |
colorFrom: green
|
5 |
colorTo: green
|
|
|
1 |
---
|
2 |
+
title: langchain-llama-2-70b-guanaco-qlora-ggml
|
3 |
emoji: 🚀
|
4 |
colorFrom: green
|
5 |
colorTo: green
|
app.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
# ruff: noqa: E501
|
4 |
import gc
|
5 |
import os
|
6 |
-
import platform
|
7 |
import random
|
8 |
import time
|
9 |
from collections import deque
|
@@ -31,49 +30,7 @@ from loguru import logger
|
|
31 |
deq = deque()
|
32 |
sig_end = object() # signals the processing is done
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
filename_list = [
|
37 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
|
38 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
|
39 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
|
40 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
|
41 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
|
42 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
|
43 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
|
44 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
|
45 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
|
46 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
|
47 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
|
48 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
|
49 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
|
50 |
-
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
|
51 |
-
]
|
52 |
-
|
53 |
-
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
|
54 |
-
|
55 |
-
url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
|
56 |
-
url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
|
57 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
|
58 |
-
url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
|
59 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
|
60 |
-
|
61 |
-
url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
|
62 |
-
|
63 |
-
_ = (
|
64 |
-
"golay" in platform.node()
|
65 |
-
or "okteto" in platform.node()
|
66 |
-
or Path("/kaggle").exists()
|
67 |
-
# or psutil.cpu_count(logical=False) < 4
|
68 |
-
or 1 # run 7b in hf
|
69 |
-
)
|
70 |
-
|
71 |
-
if _:
|
72 |
-
# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
|
73 |
-
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
|
74 |
-
url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
|
75 |
-
url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
|
76 |
-
|
77 |
|
78 |
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
79 |
|
@@ -139,13 +96,23 @@ prompt_template = """You are a helpful assistant. Let's think step by step.
|
|
139 |
### HUMAN:
|
140 |
{input}
|
141 |
### RESPONSE:"""
|
142 |
-
|
143 |
-
# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
|
144 |
-
|
145 |
human_prefix = "### HUMAN"
|
146 |
ai_prefix = "### RESPONSE"
|
147 |
stop = [f"{human_prefix}:"]
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
|
150 |
stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
151 |
|
@@ -430,7 +397,7 @@ with gr.Blocks(
|
|
430 |
gr.Markdown(
|
431 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
432 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
433 |
-
It typically takes about
|
434 |
|
435 |
Most examples are meant for another model.
|
436 |
You probably should try to test
|
|
|
3 |
# ruff: noqa: E501
|
4 |
import gc
|
5 |
import os
|
|
|
6 |
import random
|
7 |
import time
|
8 |
from collections import deque
|
|
|
30 |
deq = deque()
|
31 |
sig_end = object() # signals the processing is done
|
32 |
|
33 |
+
url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
36 |
|
|
|
96 |
### HUMAN:
|
97 |
{input}
|
98 |
### RESPONSE:"""
|
|
|
|
|
|
|
99 |
human_prefix = "### HUMAN"
|
100 |
ai_prefix = "### RESPONSE"
|
101 |
stop = [f"{human_prefix}:"]
|
102 |
|
103 |
+
# Prompt template: Guanaco
|
104 |
+
prompt_template = """You are a helpful assistant. Let's think step by step.
|
105 |
+
{history}
|
106 |
+
### Human:
|
107 |
+
{input}
|
108 |
+
### Assistant:"""
|
109 |
+
human_prefix = "### Human"
|
110 |
+
ai_prefix = "### Assistant"
|
111 |
+
stop = [f"{human_prefix}:"]
|
112 |
+
|
113 |
+
# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
|
114 |
+
|
115 |
+
|
116 |
_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
|
117 |
stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
118 |
|
|
|
397 |
gr.Markdown(
|
398 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
399 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
400 |
+
It typically takes about xxx seconds for the first response to appear.
|
401 |
|
402 |
Most examples are meant for another model.
|
403 |
You probably should try to test
|