ffreemt commited on
Commit
c002b9c
1 Parent(s): 3617af0

Update q3ks 29.7G

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +15 -48
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: langchain-llama2-7b-chat-uncensored-ggml
3
  emoji: 🚀
4
  colorFrom: green
5
  colorTo: green
 
1
  ---
2
+ title: langchain-llama-2-70b-guanaco-qlora-ggml
3
  emoji: 🚀
4
  colorFrom: green
5
  colorTo: green
app.py CHANGED
@@ -3,7 +3,6 @@
3
  # ruff: noqa: E501
4
  import gc
5
  import os
6
- import platform
7
  import random
8
  import time
9
  from collections import deque
@@ -31,49 +30,7 @@ from loguru import logger
31
  deq = deque()
32
  sig_end = object() # signals the processing is done
33
 
34
- # from langchain.llms import OpenAI
35
-
36
- filename_list = [
37
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
38
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
39
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
40
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
41
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
42
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
43
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
44
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
45
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
46
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
47
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
48
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
49
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
50
- "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
51
- ]
52
-
53
- URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
54
-
55
- url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
56
- url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
57
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
58
- url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
59
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
60
-
61
- url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
62
-
63
- _ = (
64
- "golay" in platform.node()
65
- or "okteto" in platform.node()
66
- or Path("/kaggle").exists()
67
- # or psutil.cpu_count(logical=False) < 4
68
- or 1 # run 7b in hf
69
- )
70
-
71
- if _:
72
- # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
73
- url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
74
- url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
75
- url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
76
-
77
 
78
  prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
79
 
@@ -139,13 +96,23 @@ prompt_template = """You are a helpful assistant. Let's think step by step.
139
  ### HUMAN:
140
  {input}
141
  ### RESPONSE:"""
142
-
143
- # PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
144
-
145
  human_prefix = "### HUMAN"
146
  ai_prefix = "### RESPONSE"
147
  stop = [f"{human_prefix}:"]
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
150
  stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
151
 
@@ -430,7 +397,7 @@ with gr.Blocks(
430
  gr.Markdown(
431
  f"""<h5><center>{Path(model_loc).name}</center></h4>
432
  The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
433
- It typically takes about 120 seconds for the first response to appear.
434
 
435
  Most examples are meant for another model.
436
  You probably should try to test
 
3
  # ruff: noqa: E501
4
  import gc
5
  import os
 
6
  import random
7
  import time
8
  from collections import deque
 
30
  deq = deque()
31
  sig_end = object() # signals the processing is done
32
 
33
+ url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
36
 
 
96
  ### HUMAN:
97
  {input}
98
  ### RESPONSE:"""
 
 
 
99
  human_prefix = "### HUMAN"
100
  ai_prefix = "### RESPONSE"
101
  stop = [f"{human_prefix}:"]
102
 
103
+ # Prompt template: Guanaco
104
+ prompt_template = """You are a helpful assistant. Let's think step by step.
105
+ {history}
106
+ ### Human:
107
+ {input}
108
+ ### Assistant:"""
109
+ human_prefix = "### Human"
110
+ ai_prefix = "### Assistant"
111
+ stop = [f"{human_prefix}:"]
112
+
113
+ # PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
114
+
115
+
116
  _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
117
  stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
118
 
 
397
  gr.Markdown(
398
  f"""<h5><center>{Path(model_loc).name}</center></h4>
399
  The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
400
+ It typically takes about xxx seconds for the first response to appear.
401
 
402
  Most examples are meant for another model.
403
  You probably should try to test