Spaces:

XicoC
/

AIMakerSpace-Midterm

Sleeping

App Files Files Community

xicocdi commited on Sep 20, 2024

Commit

0d2dab1

1 Parent(s): b9e9736

update vectorstore

Browse files

Files changed (2) hide show

app.py +22 -25
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -9,7 +9,9 @@ from langchain_community.vectorstores.chroma import Chroma
 from langchain_openai import ChatOpenAI
 from langchain.prompts import PromptTemplate
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 import chainlit as cl
 load_dotenv()
@@ -18,32 +20,27 @@ pdf_paths = [
     "AI_Risk_Management_Framework.pdf",
     "Blueprint-for-an-AI-Bill-of-Rights.pdf",
 ]
-persist_directory = "docs/chroma/"
-if os.path.exists(persist_directory) and os.listdir(persist_directory):
-    print("Loading existing vector database...")
-    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
-    vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
-else:
-    print("Creating new vector database...")
-    documents = []
-    for pdf_path in pdf_paths:
-        loader = PyPDFLoader(pdf_path)
-        documents.extend(loader.load())
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
-    )
-    docs = text_splitter.split_documents(documents)
-    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
-    vectordb = Chroma.from_documents(
-        documents=docs, embedding=embedding, persist_directory=persist_directory
-    )
 custom_template = """
 You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
@@ -70,13 +67,13 @@ PROMPT = PromptTemplate(
     template=custom_template, input_variables=["context", "question", "chat_history"]
 )
-retriever = vectordb.as_retriever(
     search_type="mmr",
     search_kwargs={"k": 4, "fetch_k": 10},
 )
 llm = ChatOpenAI(
-    model="gpt-4o-mini",
     temperature=0.1,
     streaming=True,
 )

 from langchain_openai import ChatOpenAI
 from langchain.prompts import PromptTemplate
 from langchain.chains import ConversationalRetrievalChain
+from langchain_community.vectorstores import Qdrant
 from langchain.memory import ConversationBufferMemory
 import chainlit as cl
 load_dotenv()
     "AI_Risk_Management_Framework.pdf",
     "Blueprint-for-an-AI-Bill-of-Rights.pdf",
 ]
+documents = []
+for pdf_path in pdf_paths:
+    loader = PyPDFLoader(pdf_path)
+    documents.extend(loader.load())
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000,
+    chunk_overlap=200,
+)
+docs = text_splitter.split_documents(documents)
+embedding = OpenAIEmbeddings(model="text-embedding-3-small")
+vectorstore = Qdrant.from_documents(
+    documents=docs,
+    embedding=embedding,
+    location=":memory:",
+    collection_name="Midterm Embedding Eval",
+)
 custom_template = """
 You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
     template=custom_template, input_variables=["context", "question", "chat_history"]
 )
+retriever = vectorstore.as_retriever(
     search_type="mmr",
     search_kwargs={"k": 4, "fetch_k": 10},
 )
 llm = ChatOpenAI(
+    model="gpt-4",
     temperature=0.1,
     streaming=True,
 )

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ python-dotenv==1.0.0
 langchain
 langchain_openai==0.1.1
 pypdf
-chromadb

 langchain
 langchain_openai==0.1.1
 pypdf
+chromadb
+qdrant-client