xicocdi commited on
Commit
9c44a22
·
1 Parent(s): 2ab1d97

push embedding-model

Browse files
Files changed (2) hide show
  1. app.py +11 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -11,6 +11,8 @@ from langchain.prompts import PromptTemplate
11
  from langchain.chains import ConversationalRetrievalChain
12
  from langchain_community.vectorstores import Qdrant
13
  from langchain.memory import ConversationBufferMemory
 
 
14
 
15
  import chainlit as cl
16
 
@@ -27,13 +29,13 @@ for pdf_path in pdf_paths:
27
  documents.extend(loader.load())
28
 
29
  text_splitter = RecursiveCharacterTextSplitter(
30
- chunk_size=1000,
31
- chunk_overlap=200,
32
  )
33
 
34
  docs = text_splitter.split_documents(documents)
35
 
36
- embedding = OpenAIEmbeddings(model="text-embedding-3-small")
37
 
38
  vectorstore = Qdrant.from_documents(
39
  documents=docs,
@@ -78,6 +80,11 @@ llm = ChatOpenAI(
78
  streaming=True,
79
  )
80
 
 
 
 
 
 
81
 
82
  @cl.on_chat_start
83
  async def start_chat():
@@ -87,7 +94,7 @@ async def start_chat():
87
 
88
  qa = ConversationalRetrievalChain.from_llm(
89
  llm,
90
- retriever=retriever,
91
  memory=memory,
92
  combine_docs_chain_kwargs={"prompt": PROMPT},
93
  return_source_documents=True,
 
11
  from langchain.chains import ConversationalRetrievalChain
12
  from langchain_community.vectorstores import Qdrant
13
  from langchain.memory import ConversationBufferMemory
14
+ from langchain_huggingface import HuggingFaceEmbeddings
15
+ from langchain.retrievers.multi_query import MultiQueryRetriever
16
 
17
  import chainlit as cl
18
 
 
29
  documents.extend(loader.load())
30
 
31
  text_splitter = RecursiveCharacterTextSplitter(
32
+ chunk_size=2000,
33
+ chunk_overlap=100,
34
  )
35
 
36
  docs = text_splitter.split_documents(documents)
37
 
38
+ embedding = HuggingFaceEmbeddings(model_name="XicoC/midterm-finetuned-arctic")
39
 
40
  vectorstore = Qdrant.from_documents(
41
  documents=docs,
 
80
  streaming=True,
81
  )
82
 
83
+ retriever_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
84
+ multiquery_retriever = MultiQueryRetriever.from_llm(
85
+ retriever=retriever, llm=retriever_llm
86
+ )
87
+
88
 
89
  @cl.on_chat_start
90
  async def start_chat():
 
94
 
95
  qa = ConversationalRetrievalChain.from_llm(
96
  llm,
97
+ retriever=multiquery_retriever,
98
  memory=memory,
99
  combine_docs_chain_kwargs={"prompt": PROMPT},
100
  return_source_documents=True,
requirements.txt CHANGED
@@ -8,4 +8,5 @@ langchain_openai==0.1.1
8
  pypdf
9
  chromadb
10
  qdrant-client
11
- importlib-metadata<7.0,>=6.0
 
 
8
  pypdf
9
  chromadb
10
  qdrant-client
11
+ importlib-metadata<7.0,>=6.0
12
+ sentence-transformers