Merge pull request #1 from jerpint/update-buster
Browse files- .github/workflows/deploy_hf.yaml +21 -0
- app.py +4 -3
- cfg.py +3 -5
- requirements.txt +2 -1
.github/workflows/deploy_hf.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main]
|
| 5 |
+
|
| 6 |
+
# to run this workflow manually from the Actions tab
|
| 7 |
+
workflow_dispatch:
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
sync-to-hub:
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
steps:
|
| 13 |
+
- uses: actions/checkout@v3
|
| 14 |
+
with:
|
| 15 |
+
fetch-depth: 0
|
| 16 |
+
lfs: true
|
| 17 |
+
- name: Push to hub
|
| 18 |
+
env:
|
| 19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 20 |
+
HF_USERNAME: ${{ secrets.HF_USERNAME }}
|
| 21 |
+
run: git push --force https://$HF_USERNAME:[email protected]/spaces/towardsai-buster/buster main
|
app.py
CHANGED
|
@@ -67,9 +67,10 @@ with block:
|
|
| 67 |
)
|
| 68 |
gr.Markdown(
|
| 69 |
"""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
#### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
|
| 75 |
"""
|
|
|
|
| 67 |
)
|
| 68 |
gr.Markdown(
|
| 69 |
"""
|
| 70 |
+
## Welcome to Buster!
|
| 71 |
+
This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
|
| 72 |
+
It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
|
| 73 |
+
Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.
|
| 74 |
|
| 75 |
#### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
|
| 76 |
"""
|
cfg.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from buster.busterbot import Buster, BusterConfig
|
| 2 |
from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
|
| 3 |
-
from buster.formatters.documents import
|
| 4 |
from buster.formatters.prompts import PromptFormatter
|
| 5 |
from buster.retriever import DeepLakeRetriever, Retriever
|
| 6 |
from buster.tokenizers import GPTTokenizer
|
|
@@ -76,7 +76,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
|
|
| 76 |
},
|
| 77 |
documents_formatter_cfg={
|
| 78 |
"max_tokens": 3500,
|
| 79 |
-
"
|
| 80 |
},
|
| 81 |
prompt_formatter_cfg={
|
| 82 |
"max_tokens": 3500,
|
|
@@ -87,10 +87,8 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
|
|
| 87 |
"If it isn't, simply reply that you cannot answer the question. "
|
| 88 |
"Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
|
| 89 |
"Here is the documentation: "
|
| 90 |
-
"<DOCUMENTS> "
|
| 91 |
),
|
| 92 |
"text_after_docs": (
|
| 93 |
-
"<\DOCUMENTS>\n"
|
| 94 |
"REMEMBER:\n"
|
| 95 |
"You are an chatbot answering technical questions on the huggingface transformers library. "
|
| 96 |
"Here are the rules you must follow:\n"
|
|
@@ -115,7 +113,7 @@ def setup_buster(buster_cfg: BusterConfig):
|
|
| 115 |
tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
|
| 116 |
document_answerer: DocumentAnswerer = DocumentAnswerer(
|
| 117 |
completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
|
| 118 |
-
documents_formatter=
|
| 119 |
tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
|
| 120 |
),
|
| 121 |
prompt_formatter=PromptFormatter(
|
|
|
|
| 1 |
from buster.busterbot import Buster, BusterConfig
|
| 2 |
from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
|
| 3 |
+
from buster.formatters.documents import DocumentsFormatterJSON
|
| 4 |
from buster.formatters.prompts import PromptFormatter
|
| 5 |
from buster.retriever import DeepLakeRetriever, Retriever
|
| 6 |
from buster.tokenizers import GPTTokenizer
|
|
|
|
| 76 |
},
|
| 77 |
documents_formatter_cfg={
|
| 78 |
"max_tokens": 3500,
|
| 79 |
+
"columns": ["content", "source", "title"],
|
| 80 |
},
|
| 81 |
prompt_formatter_cfg={
|
| 82 |
"max_tokens": 3500,
|
|
|
|
| 87 |
"If it isn't, simply reply that you cannot answer the question. "
|
| 88 |
"Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
|
| 89 |
"Here is the documentation: "
|
|
|
|
| 90 |
),
|
| 91 |
"text_after_docs": (
|
|
|
|
| 92 |
"REMEMBER:\n"
|
| 93 |
"You are an chatbot answering technical questions on the huggingface transformers library. "
|
| 94 |
"Here are the rules you must follow:\n"
|
|
|
|
| 113 |
tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
|
| 114 |
document_answerer: DocumentAnswerer = DocumentAnswerer(
|
| 115 |
completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
|
| 116 |
+
documents_formatter=DocumentsFormatterJSON(
|
| 117 |
tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
|
| 118 |
),
|
| 119 |
prompt_formatter=PromptFormatter(
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
-
|
| 2 |
huggingface-hub
|
| 3 |
gradio
|
|
|
|
|
|
| 1 |
+
buster-doctalk==1.0.19
|
| 2 |
huggingface-hub
|
| 3 |
gradio
|
| 4 |
+
promptlayer
|