Spaces:
Sleeping
Sleeping
xicocdi
commited on
Commit
·
9c44a22
1
Parent(s):
2ab1d97
push embedding-model
Browse files- app.py +11 -4
- requirements.txt +2 -1
app.py
CHANGED
@@ -11,6 +11,8 @@ from langchain.prompts import PromptTemplate
|
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
12 |
from langchain_community.vectorstores import Qdrant
|
13 |
from langchain.memory import ConversationBufferMemory
|
|
|
|
|
14 |
|
15 |
import chainlit as cl
|
16 |
|
@@ -27,13 +29,13 @@ for pdf_path in pdf_paths:
|
|
27 |
documents.extend(loader.load())
|
28 |
|
29 |
text_splitter = RecursiveCharacterTextSplitter(
|
30 |
-
chunk_size=
|
31 |
-
chunk_overlap=
|
32 |
)
|
33 |
|
34 |
docs = text_splitter.split_documents(documents)
|
35 |
|
36 |
-
embedding =
|
37 |
|
38 |
vectorstore = Qdrant.from_documents(
|
39 |
documents=docs,
|
@@ -78,6 +80,11 @@ llm = ChatOpenAI(
|
|
78 |
streaming=True,
|
79 |
)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
@cl.on_chat_start
|
83 |
async def start_chat():
|
@@ -87,7 +94,7 @@ async def start_chat():
|
|
87 |
|
88 |
qa = ConversationalRetrievalChain.from_llm(
|
89 |
llm,
|
90 |
-
retriever=
|
91 |
memory=memory,
|
92 |
combine_docs_chain_kwargs={"prompt": PROMPT},
|
93 |
return_source_documents=True,
|
|
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
12 |
from langchain_community.vectorstores import Qdrant
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
15 |
+
from langchain.retrievers.multi_query import MultiQueryRetriever
|
16 |
|
17 |
import chainlit as cl
|
18 |
|
|
|
29 |
documents.extend(loader.load())
|
30 |
|
31 |
text_splitter = RecursiveCharacterTextSplitter(
|
32 |
+
chunk_size=2000,
|
33 |
+
chunk_overlap=100,
|
34 |
)
|
35 |
|
36 |
docs = text_splitter.split_documents(documents)
|
37 |
|
38 |
+
embedding = HuggingFaceEmbeddings(model_name="XicoC/midterm-finetuned-arctic")
|
39 |
|
40 |
vectorstore = Qdrant.from_documents(
|
41 |
documents=docs,
|
|
|
80 |
streaming=True,
|
81 |
)
|
82 |
|
83 |
+
retriever_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
84 |
+
multiquery_retriever = MultiQueryRetriever.from_llm(
|
85 |
+
retriever=retriever, llm=retriever_llm
|
86 |
+
)
|
87 |
+
|
88 |
|
89 |
@cl.on_chat_start
|
90 |
async def start_chat():
|
|
|
94 |
|
95 |
qa = ConversationalRetrievalChain.from_llm(
|
96 |
llm,
|
97 |
+
retriever=multiquery_retriever,
|
98 |
memory=memory,
|
99 |
combine_docs_chain_kwargs={"prompt": PROMPT},
|
100 |
return_source_documents=True,
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ langchain_openai==0.1.1
|
|
8 |
pypdf
|
9 |
chromadb
|
10 |
qdrant-client
|
11 |
-
importlib-metadata<7.0,>=6.0
|
|
|
|
8 |
pypdf
|
9 |
chromadb
|
10 |
qdrant-client
|
11 |
+
importlib-metadata<7.0,>=6.0
|
12 |
+
sentence-transformers
|