xicocdi commited on
Commit
0d2dab1
·
1 Parent(s): b9e9736

update vectorstore

Browse files
Files changed (2) hide show
  1. app.py +22 -25
  2. requirements.txt +2 -1
app.py CHANGED
@@ -9,7 +9,9 @@ from langchain_community.vectorstores.chroma import Chroma
9
  from langchain_openai import ChatOpenAI
10
  from langchain.prompts import PromptTemplate
11
  from langchain.chains import ConversationalRetrievalChain
 
12
  from langchain.memory import ConversationBufferMemory
 
13
  import chainlit as cl
14
 
15
  load_dotenv()
@@ -18,32 +20,27 @@ pdf_paths = [
18
  "AI_Risk_Management_Framework.pdf",
19
  "Blueprint-for-an-AI-Bill-of-Rights.pdf",
20
  ]
21
- persist_directory = "docs/chroma/"
22
-
23
-
24
- if os.path.exists(persist_directory) and os.listdir(persist_directory):
25
- print("Loading existing vector database...")
26
- embedding = OpenAIEmbeddings(model="text-embedding-3-small")
27
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
28
- else:
29
- print("Creating new vector database...")
30
- documents = []
31
- for pdf_path in pdf_paths:
32
- loader = PyPDFLoader(pdf_path)
33
- documents.extend(loader.load())
34
-
35
- text_splitter = RecursiveCharacterTextSplitter(
36
- chunk_size=1000,
37
- chunk_overlap=200,
38
- )
39
 
40
- docs = text_splitter.split_documents(documents)
 
 
 
41
 
42
- embedding = OpenAIEmbeddings(model="text-embedding-3-small")
 
 
 
43
 
44
- vectordb = Chroma.from_documents(
45
- documents=docs, embedding=embedding, persist_directory=persist_directory
46
- )
 
 
 
 
 
 
 
47
 
48
  custom_template = """
49
  You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
@@ -70,13 +67,13 @@ PROMPT = PromptTemplate(
70
  template=custom_template, input_variables=["context", "question", "chat_history"]
71
  )
72
 
73
- retriever = vectordb.as_retriever(
74
  search_type="mmr",
75
  search_kwargs={"k": 4, "fetch_k": 10},
76
  )
77
 
78
  llm = ChatOpenAI(
79
- model="gpt-4o-mini",
80
  temperature=0.1,
81
  streaming=True,
82
  )
 
9
  from langchain_openai import ChatOpenAI
10
  from langchain.prompts import PromptTemplate
11
  from langchain.chains import ConversationalRetrievalChain
12
+ from langchain_community.vectorstores import Qdrant
13
  from langchain.memory import ConversationBufferMemory
14
+
15
  import chainlit as cl
16
 
17
  load_dotenv()
 
20
  "AI_Risk_Management_Framework.pdf",
21
  "Blueprint-for-an-AI-Bill-of-Rights.pdf",
22
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ documents = []
25
+ for pdf_path in pdf_paths:
26
+ loader = PyPDFLoader(pdf_path)
27
+ documents.extend(loader.load())
28
 
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=1000,
31
+ chunk_overlap=200,
32
+ )
33
 
34
+ docs = text_splitter.split_documents(documents)
35
+
36
+ embedding = OpenAIEmbeddings(model="text-embedding-3-small")
37
+
38
+ vectorstore = Qdrant.from_documents(
39
+ documents=docs,
40
+ embedding=embedding,
41
+ location=":memory:",
42
+ collection_name="Midterm Embedding Eval",
43
+ )
44
 
45
  custom_template = """
46
  You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
 
67
  template=custom_template, input_variables=["context", "question", "chat_history"]
68
  )
69
 
70
+ retriever = vectorstore.as_retriever(
71
  search_type="mmr",
72
  search_kwargs={"k": 4, "fetch_k": 10},
73
  )
74
 
75
  llm = ChatOpenAI(
76
+ model="gpt-4",
77
  temperature=0.1,
78
  streaming=True,
79
  )
requirements.txt CHANGED
@@ -6,4 +6,5 @@ python-dotenv==1.0.0
6
  langchain
7
  langchain_openai==0.1.1
8
  pypdf
9
- chromadb
 
 
6
  langchain
7
  langchain_openai==0.1.1
8
  pypdf
9
+ chromadb
10
+ qdrant-client