langchain-llama-2-70b-guanaco-qlora-ggml

Sleeping

App Files Files Community

ffreemt commited on Jul 28, 2023

Commit

c002b9c

1 Parent(s): 3617af0

Update q3ks 29.7G

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +15 -48

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: langchain-llama2-7b-chat-uncensored-ggml
 emoji: 🚀
 colorFrom: green
 colorTo: green

 ---
+title: langchain-llama-2-70b-guanaco-qlora-ggml
 emoji: 🚀
 colorFrom: green
 colorTo: green

app.py CHANGED Viewed

@@ -3,7 +3,6 @@
 # ruff: noqa: E501
 import gc
 import os
-import platform
 import random
 import time
 from collections import deque
@@ -31,49 +30,7 @@ from loguru import logger
 deq = deque()
 sig_end = object()  # signals the processing is done
-# from langchain.llms import OpenAI
-filename_list = [
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
-    "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
-]
-URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"  # 4.05G
-url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
-url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin"  # 7.37G
-# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
-url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"  # 6.93G
-# url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin"  # 7.87G
-url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin"  # 7.37G
-_ = (
-    "golay" in platform.node()
-    or "okteto" in platform.node()
-    or Path("/kaggle").exists()
-    # or psutil.cpu_count(logical=False) < 4
-    or 1  # run 7b in hf
-)
-if _:
-    # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
-    url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin"  # 2.87G
-    url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin"  # 2.87G
-    url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin"  # 4.08G
 prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -139,13 +96,23 @@ prompt_template = """You are a helpful assistant. Let's think step by step.
 ### HUMAN:
 {input}
 ### RESPONSE:"""
-# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
 human_prefix = "### HUMAN"
 ai_prefix = "### RESPONSE"
 stop = [f"{human_prefix}:"]
 _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
 stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
@@ -430,7 +397,7 @@ with gr.Blocks(
         gr.Markdown(
             f"""<h5><center>{Path(model_loc).name}</center></h4>
             The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
-            It typically takes about 120 seconds for the first response to appear.
             Most examples are meant for another model.
             You probably should try to test

 # ruff: noqa: E501
 import gc
 import os
 import random
 import time
 from collections import deque
 deq = deque()
 sig_end = object()  # signals the processing is done
+url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"  # 29.7G
 prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### HUMAN:
 {input}
 ### RESPONSE:"""
 human_prefix = "### HUMAN"
 ai_prefix = "### RESPONSE"
 stop = [f"{human_prefix}:"]
+# Prompt template: Guanaco
+prompt_template = """You are a helpful assistant. Let's think step by step.
+{history}
+### Human:
+{input}
+### Assistant:"""
+human_prefix = "### Human"
+ai_prefix = "### Assistant"
+stop = [f"{human_prefix}:"]
+# PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
 _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
 stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
         gr.Markdown(
             f"""<h5><center>{Path(model_loc).name}</center></h4>
             The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
+            It typically takes about xxx  seconds for the first response to appear.
             Most examples are meant for another model.
             You probably should try to test