Spaces:

jerpint
/

buster

Running

App Files Files Community

jerpint commited on Apr 9, 2024

Commit

1423dd4

1 Parent(s): 1e97486

update to latest buster

Browse files

Files changed (3) hide show

app.py +1 -0
cfg.py +69 -22
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -50,6 +50,7 @@ def chat(history):
     user_input = history[-1][0]
     completion = buster.process_input(user_input)
     history[-1][1] = ""

     user_input = history[-1][0]
     completion = buster.process_input(user_input)
+    print(completion)
     history[-1][1] = ""

cfg.py CHANGED Viewed

@@ -1,14 +1,23 @@
 from buster.busterbot import Buster, BusterConfig
-from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
 from buster.formatters.documents import DocumentsFormatterJSON
 from buster.formatters.prompts import PromptFormatter
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
-from buster.validators import QuestionAnswerValidator, Validator
-from buster.utils import extract_zip
 from huggingface_hub import hf_hub_download
 HUB_DB_FILE = "deeplake_store.zip"
 REPO_ID = "jerpint/hf_buster_data"
@@ -24,19 +33,43 @@ extract_zip(zip_file_path=HUB_DB_FILE, output_path=".")
 buster_cfg = BusterConfig(
     validator_cfg={
-        "unknown_response_templates": [
-            "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
-        ],
-        "unknown_threshold": 0.85,
-        "embedding_model": "text-embedding-ada-002",
-        "use_reranking": True,
-        "invalid_question_response": "This question does not seem relevant to my current knowledge.",
-        "check_question_prompt": """You are a chatbot answering technical questions on the huggingface documentation, a library used to train and do inference on open-source artificial intelligence models.
-Your job is to determine wether or not a question is valid, and should be answered.
-More general questions are not considered valid, even if you might know the response.
-Questions that are likely to be related to the huggingface library are considered valid.
 A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
 For example:
@@ -48,18 +81,31 @@ Q: What is the meaning of life?
 false
 A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
-        "completion_kwargs": {
-            "model": "gpt-3.5-turbo",
-            "stream": False,
-            "temperature": 0,
         },
     },
     retriever_cfg={
         "path": "deeplake_store",
         "top_k": 3,
         "thresh": 0.7,
         "max_tokens": 2000,
-        "embedding_model": "text-embedding-ada-002",
     },
     documents_answerer_cfg={
         "no_documents_message": "No documents are available for this question.",
@@ -70,6 +116,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
             "stream": True,
             "temperature": 0,
         },
     },
     tokenizer_cfg={
         "model_name": "gpt-3.5-turbo",
@@ -86,7 +133,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
             "If the answer is in the documentation, summarize it in a helpful way to the user. "
             "If it isn't, simply reply that you cannot answer the question. "
             "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
-            "Here is the documentation: "
         ),
         "text_after_docs": (
             "REMEMBER:\n"
@@ -121,7 +168,7 @@ def setup_buster(buster_cfg: BusterConfig):
         ),
         **buster_cfg.documents_answerer_cfg,
     )
-    validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg)
     buster: Buster = Buster(
         retriever=retriever, document_answerer=document_answerer, validator=validator
     )

 from buster.busterbot import Buster, BusterConfig
+from buster.completers import ChatGPTCompleter, DocumentAnswerer
 from buster.formatters.documents import DocumentsFormatterJSON
 from buster.formatters.prompts import PromptFormatter
+from buster.llm_utils import get_openai_embedding_constructor
+from buster.utils import extract_zip
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
+from buster.validators import Validator
 from huggingface_hub import hf_hub_download
+# kwargs to pass to OpenAI client
+client_kwargs = {
+    "timeout": 20,
+    "max_retries": 3,
+}
+embedding_fn = get_openai_embedding_constructor(client_kwargs=client_kwargs)
 HUB_DB_FILE = "deeplake_store.zip"
 REPO_ID = "jerpint/hf_buster_data"
 buster_cfg = BusterConfig(
+    #     validator_cfg={
+    #         "unknown_response_templates": [
+    #             "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
+    #         ],
+    #         "unknown_threshold": 0.85,
+    #         "embedding_model": "text-embedding-ada-002",
+    #         "use_reranking": True,
+    #         "invalid_question_response": "This question does not seem relevant to my current knowledge.",
+    #         "check_question_prompt": """You are a chatbot answering technical questions on the huggingface documentation, a library used to train and do inference on open-source artificial intelligence models.
+    # Your job is to determine wether or not a question is valid, and should be answered.
+    # More general questions are not considered valid, even if you might know the response.
+    # Questions that are likely to be related to the huggingface library are considered valid.
+    # A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
+    # For example:
+    # Q: How can I train a vision model?
+    # true
+    # Q: What is the meaning of life?
+    # false
+    # A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
+    #         "completion_kwargs": {
+    #             "model": "gpt-3.5-turbo",
+    #             "stream": False,
+    #             "temperature": 0,
+    #         },
+    #     },
     validator_cfg={
+        "question_validator_cfg": {
+            "invalid_question_response": "This question does not seem relevant to my current knowledge.",
+            "completion_kwargs": {
+                "model": "gpt-3.5-turbo",
+                "stream": False,
+                "temperature": 0,
+            },
+            "client_kwargs": client_kwargs,
+            "check_question_prompt": """You are a chatbot answering technical questions on the Hugging Face documentation, a library used to train and do inference on open-source artificial intelligence models.
+A user will submit a question. Your job is only to determine wether or not a question might be related to the library usage or to training AI models.
+Questions that are likely to be related to the hugging face library or AI are considered valid.
 A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
 For example:
 false
 A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
         },
+        "answer_validator_cfg": {
+            "unknown_response_templates": [
+                "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
+            ],
+            "unknown_threshold": 0.85,
+            "embedding_fn": embedding_fn,
+        },
+        "documents_validator_cfg": {
+            "completion_kwargs": {
+                "model": "gpt-3.5-turbo",
+                "stream": False,
+                "temperature": 0,
+            },
+            "client_kwargs": client_kwargs,
+        },
+        "use_reranking": True,
+        "validate_documents": False,
     },
     retriever_cfg={
         "path": "deeplake_store",
         "top_k": 3,
         "thresh": 0.7,
         "max_tokens": 2000,
+        "embedding_model": embedding_fn,
     },
     documents_answerer_cfg={
         "no_documents_message": "No documents are available for this question.",
             "stream": True,
             "temperature": 0,
         },
+        "client_kwargs": client_kwargs,
     },
     tokenizer_cfg={
         "model_name": "gpt-3.5-turbo",
             "If the answer is in the documentation, summarize it in a helpful way to the user. "
             "If it isn't, simply reply that you cannot answer the question. "
             "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
+            "Here is the documentation:\n"
         ),
         "text_after_docs": (
             "REMEMBER:\n"
         ),
         **buster_cfg.documents_answerer_cfg,
     )
+    validator: Validator = Validator(**buster_cfg.validator_cfg)
     buster: Buster = Buster(
         retriever=retriever, document_answerer=document_answerer, validator=validator
     )

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-buster-doctalk==1.0.19
 huggingface-hub
 gradio
 promptlayer

+buster-doctalk==1.0.28
 huggingface-hub
 gradio
 promptlayer