Chris Alexiuk
commited on
Commit
·
0775a33
1
Parent(s):
3e7467d
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,26 +14,26 @@ import arxiv
|
|
| 14 |
import chainlit as cl
|
| 15 |
from chainlit import user_session
|
| 16 |
|
| 17 |
-
@cl.langchain_factory
|
| 18 |
-
def init():
|
| 19 |
arxiv_query = None
|
| 20 |
|
| 21 |
# Wait for the user to ask an Arxiv question
|
| 22 |
while arxiv_query == None:
|
| 23 |
-
arxiv_query = cl.AskUserMessage(
|
| 24 |
content="Please enter a topic to begin!", timeout=15
|
| 25 |
).send()
|
| 26 |
|
| 27 |
# Obtain the top 30 results from Arxiv for the query
|
| 28 |
search = arxiv.Search(
|
| 29 |
query=arxiv_query["content"],
|
| 30 |
-
max_results=
|
| 31 |
sort_by=arxiv.SortCriterion.Relevance,
|
| 32 |
)
|
| 33 |
|
|
|
|
| 34 |
# download each of the pdfs
|
| 35 |
pdf_data = []
|
| 36 |
-
|
| 37 |
for result in search.results():
|
| 38 |
loader = PyMuPDFLoader(result.pdf_url)
|
| 39 |
loaded_pdf = loader.load()
|
|
@@ -48,12 +48,15 @@ def init():
|
|
| 48 |
embeddings = OpenAIEmbeddings(
|
| 49 |
disallowed_special=(),
|
| 50 |
)
|
|
|
|
|
|
|
|
|
|
| 51 |
docsearch = Chroma.from_documents(pdf_data, embeddings)
|
| 52 |
|
| 53 |
# Create a chain that uses the Chroma vector store
|
| 54 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
| 55 |
ChatOpenAI(
|
| 56 |
-
model_name="gpt-
|
| 57 |
temperature=0,
|
| 58 |
),
|
| 59 |
chain_type="stuff",
|
|
@@ -62,7 +65,7 @@ def init():
|
|
| 62 |
)
|
| 63 |
|
| 64 |
# Let the user know that the system is ready
|
| 65 |
-
cl.Message(
|
| 66 |
content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
|
| 67 |
).send()
|
| 68 |
|
|
@@ -70,7 +73,7 @@ def init():
|
|
| 70 |
|
| 71 |
|
| 72 |
@cl.langchain_postprocess
|
| 73 |
-
def process_response(res):
|
| 74 |
answer = res["answer"]
|
| 75 |
source_elements_dict = {}
|
| 76 |
source_elements = []
|
|
@@ -94,7 +97,7 @@ def process_response(res):
|
|
| 94 |
page_numbers = ", ".join([str(x) for x in source["page_number"]])
|
| 95 |
text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
|
| 96 |
source_elements.append(
|
| 97 |
-
cl.Text(name=title,
|
| 98 |
)
|
| 99 |
|
| 100 |
-
cl.Message(content=answer, elements=source_elements).send()
|
|
|
|
| 14 |
import chainlit as cl
|
| 15 |
from chainlit import user_session
|
| 16 |
|
| 17 |
+
@cl.langchain_factory(use_async=True)
|
| 18 |
+
async def init():
|
| 19 |
arxiv_query = None
|
| 20 |
|
| 21 |
# Wait for the user to ask an Arxiv question
|
| 22 |
while arxiv_query == None:
|
| 23 |
+
arxiv_query = await cl.AskUserMessage(
|
| 24 |
content="Please enter a topic to begin!", timeout=15
|
| 25 |
).send()
|
| 26 |
|
| 27 |
# Obtain the top 30 results from Arxiv for the query
|
| 28 |
search = arxiv.Search(
|
| 29 |
query=arxiv_query["content"],
|
| 30 |
+
max_results=3,
|
| 31 |
sort_by=arxiv.SortCriterion.Relevance,
|
| 32 |
)
|
| 33 |
|
| 34 |
+
await cl.Message(content="Downloading and chunking articles...").send()
|
| 35 |
# download each of the pdfs
|
| 36 |
pdf_data = []
|
|
|
|
| 37 |
for result in search.results():
|
| 38 |
loader = PyMuPDFLoader(result.pdf_url)
|
| 39 |
loaded_pdf = loader.load()
|
|
|
|
| 48 |
embeddings = OpenAIEmbeddings(
|
| 49 |
disallowed_special=(),
|
| 50 |
)
|
| 51 |
+
|
| 52 |
+
# If operation takes too long, make_async allows to run in a thread
|
| 53 |
+
# docsearch = await cl.make_async(Chroma.from_documents)(pdf_data, embeddings)
|
| 54 |
docsearch = Chroma.from_documents(pdf_data, embeddings)
|
| 55 |
|
| 56 |
# Create a chain that uses the Chroma vector store
|
| 57 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
| 58 |
ChatOpenAI(
|
| 59 |
+
model_name="gpt-3.5-turbo-16k",
|
| 60 |
temperature=0,
|
| 61 |
),
|
| 62 |
chain_type="stuff",
|
|
|
|
| 65 |
)
|
| 66 |
|
| 67 |
# Let the user know that the system is ready
|
| 68 |
+
await cl.Message(
|
| 69 |
content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
|
| 70 |
).send()
|
| 71 |
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
@cl.langchain_postprocess
|
| 76 |
+
async def process_response(res):
|
| 77 |
answer = res["answer"]
|
| 78 |
source_elements_dict = {}
|
| 79 |
source_elements = []
|
|
|
|
| 97 |
page_numbers = ", ".join([str(x) for x in source["page_number"]])
|
| 98 |
text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
|
| 99 |
source_elements.append(
|
| 100 |
+
cl.Text(name=title, content=text_for_source, display="inline")
|
| 101 |
)
|
| 102 |
|
| 103 |
+
await cl.Message(content=answer, elements=source_elements).send()
|