jerpint commited on
Commit
1423dd4
1 Parent(s): 1e97486

update to latest buster

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. cfg.py +69 -22
  3. requirements.txt +1 -1
app.py CHANGED
@@ -50,6 +50,7 @@ def chat(history):
50
  user_input = history[-1][0]
51
 
52
  completion = buster.process_input(user_input)
 
53
 
54
  history[-1][1] = ""
55
 
 
50
  user_input = history[-1][0]
51
 
52
  completion = buster.process_input(user_input)
53
+ print(completion)
54
 
55
  history[-1][1] = ""
56
 
cfg.py CHANGED
@@ -1,14 +1,23 @@
1
  from buster.busterbot import Buster, BusterConfig
2
- from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
3
  from buster.formatters.documents import DocumentsFormatterJSON
4
  from buster.formatters.prompts import PromptFormatter
 
 
5
  from buster.retriever import DeepLakeRetriever, Retriever
6
  from buster.tokenizers import GPTTokenizer
7
- from buster.validators import QuestionAnswerValidator, Validator
8
- from buster.utils import extract_zip
9
 
10
  from huggingface_hub import hf_hub_download
11
 
 
 
 
 
 
 
 
 
12
 
13
  HUB_DB_FILE = "deeplake_store.zip"
14
  REPO_ID = "jerpint/hf_buster_data"
@@ -24,19 +33,43 @@ extract_zip(zip_file_path=HUB_DB_FILE, output_path=".")
24
 
25
 
26
  buster_cfg = BusterConfig(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  validator_cfg={
28
- "unknown_response_templates": [
29
- "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
30
- ],
31
- "unknown_threshold": 0.85,
32
- "embedding_model": "text-embedding-ada-002",
33
- "use_reranking": True,
34
- "invalid_question_response": "This question does not seem relevant to my current knowledge.",
35
- "check_question_prompt": """You are a chatbot answering technical questions on the huggingface documentation, a library used to train and do inference on open-source artificial intelligence models.
36
-
37
- Your job is to determine wether or not a question is valid, and should be answered.
38
- More general questions are not considered valid, even if you might know the response.
39
- Questions that are likely to be related to the huggingface library are considered valid.
40
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
41
 
42
  For example:
@@ -48,18 +81,31 @@ Q: What is the meaning of life?
48
  false
49
 
50
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
51
- "completion_kwargs": {
52
- "model": "gpt-3.5-turbo",
53
- "stream": False,
54
- "temperature": 0,
55
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  },
57
  retriever_cfg={
58
  "path": "deeplake_store",
59
  "top_k": 3,
60
  "thresh": 0.7,
61
  "max_tokens": 2000,
62
- "embedding_model": "text-embedding-ada-002",
63
  },
64
  documents_answerer_cfg={
65
  "no_documents_message": "No documents are available for this question.",
@@ -70,6 +116,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
70
  "stream": True,
71
  "temperature": 0,
72
  },
 
73
  },
74
  tokenizer_cfg={
75
  "model_name": "gpt-3.5-turbo",
@@ -86,7 +133,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
86
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
87
  "If it isn't, simply reply that you cannot answer the question. "
88
  "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
89
- "Here is the documentation: "
90
  ),
91
  "text_after_docs": (
92
  "REMEMBER:\n"
@@ -121,7 +168,7 @@ def setup_buster(buster_cfg: BusterConfig):
121
  ),
122
  **buster_cfg.documents_answerer_cfg,
123
  )
124
- validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg)
125
  buster: Buster = Buster(
126
  retriever=retriever, document_answerer=document_answerer, validator=validator
127
  )
 
1
  from buster.busterbot import Buster, BusterConfig
2
+ from buster.completers import ChatGPTCompleter, DocumentAnswerer
3
  from buster.formatters.documents import DocumentsFormatterJSON
4
  from buster.formatters.prompts import PromptFormatter
5
+ from buster.llm_utils import get_openai_embedding_constructor
6
+ from buster.utils import extract_zip
7
  from buster.retriever import DeepLakeRetriever, Retriever
8
  from buster.tokenizers import GPTTokenizer
9
+ from buster.validators import Validator
 
10
 
11
  from huggingface_hub import hf_hub_download
12
 
13
+ # kwargs to pass to OpenAI client
14
+ client_kwargs = {
15
+ "timeout": 20,
16
+ "max_retries": 3,
17
+ }
18
+
19
+ embedding_fn = get_openai_embedding_constructor(client_kwargs=client_kwargs)
20
+
21
 
22
  HUB_DB_FILE = "deeplake_store.zip"
23
  REPO_ID = "jerpint/hf_buster_data"
 
33
 
34
 
35
  buster_cfg = BusterConfig(
36
+ # validator_cfg={
37
+ # "unknown_response_templates": [
38
+ # "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
39
+ # ],
40
+ # "unknown_threshold": 0.85,
41
+ # "embedding_model": "text-embedding-ada-002",
42
+ # "use_reranking": True,
43
+ # "invalid_question_response": "This question does not seem relevant to my current knowledge.",
44
+ # "check_question_prompt": """You are a chatbot answering technical questions on the huggingface documentation, a library used to train and do inference on open-source artificial intelligence models.
45
+ # Your job is to determine wether or not a question is valid, and should be answered.
46
+ # More general questions are not considered valid, even if you might know the response.
47
+ # Questions that are likely to be related to the huggingface library are considered valid.
48
+ # A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
49
+ # For example:
50
+ # Q: How can I train a vision model?
51
+ # true
52
+ # Q: What is the meaning of life?
53
+ # false
54
+ # A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
55
+ # "completion_kwargs": {
56
+ # "model": "gpt-3.5-turbo",
57
+ # "stream": False,
58
+ # "temperature": 0,
59
+ # },
60
+ # },
61
  validator_cfg={
62
+ "question_validator_cfg": {
63
+ "invalid_question_response": "This question does not seem relevant to my current knowledge.",
64
+ "completion_kwargs": {
65
+ "model": "gpt-3.5-turbo",
66
+ "stream": False,
67
+ "temperature": 0,
68
+ },
69
+ "client_kwargs": client_kwargs,
70
+ "check_question_prompt": """You are a chatbot answering technical questions on the Hugging Face documentation, a library used to train and do inference on open-source artificial intelligence models.
71
+ A user will submit a question. Your job is only to determine wether or not a question might be related to the library usage or to training AI models.
72
+ Questions that are likely to be related to the hugging face library or AI are considered valid.
 
73
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
74
 
75
  For example:
 
81
  false
82
 
83
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
 
 
 
 
84
  },
85
+ "answer_validator_cfg": {
86
+ "unknown_response_templates": [
87
+ "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
88
+ ],
89
+ "unknown_threshold": 0.85,
90
+ "embedding_fn": embedding_fn,
91
+ },
92
+ "documents_validator_cfg": {
93
+ "completion_kwargs": {
94
+ "model": "gpt-3.5-turbo",
95
+ "stream": False,
96
+ "temperature": 0,
97
+ },
98
+ "client_kwargs": client_kwargs,
99
+ },
100
+ "use_reranking": True,
101
+ "validate_documents": False,
102
  },
103
  retriever_cfg={
104
  "path": "deeplake_store",
105
  "top_k": 3,
106
  "thresh": 0.7,
107
  "max_tokens": 2000,
108
+ "embedding_model": embedding_fn,
109
  },
110
  documents_answerer_cfg={
111
  "no_documents_message": "No documents are available for this question.",
 
116
  "stream": True,
117
  "temperature": 0,
118
  },
119
+ "client_kwargs": client_kwargs,
120
  },
121
  tokenizer_cfg={
122
  "model_name": "gpt-3.5-turbo",
 
133
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
134
  "If it isn't, simply reply that you cannot answer the question. "
135
  "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
136
+ "Here is the documentation:\n"
137
  ),
138
  "text_after_docs": (
139
  "REMEMBER:\n"
 
168
  ),
169
  **buster_cfg.documents_answerer_cfg,
170
  )
171
+ validator: Validator = Validator(**buster_cfg.validator_cfg)
172
  buster: Buster = Buster(
173
  retriever=retriever, document_answerer=document_answerer, validator=validator
174
  )
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- buster-doctalk==1.0.19
2
  huggingface-hub
3
  gradio
4
  promptlayer
 
1
+ buster-doctalk==1.0.28
2
  huggingface-hub
3
  gradio
4
  promptlayer