Spaces:
Sleeping
Sleeping
xicocdi
commited on
Commit
·
0d2dab1
1
Parent(s):
b9e9736
update vectorstore
Browse files- app.py +22 -25
- requirements.txt +2 -1
app.py
CHANGED
@@ -9,7 +9,9 @@ from langchain_community.vectorstores.chroma import Chroma
|
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
12 |
from langchain.memory import ConversationBufferMemory
|
|
|
13 |
import chainlit as cl
|
14 |
|
15 |
load_dotenv()
|
@@ -18,32 +20,27 @@ pdf_paths = [
|
|
18 |
"AI_Risk_Management_Framework.pdf",
|
19 |
"Blueprint-for-an-AI-Bill-of-Rights.pdf",
|
20 |
]
|
21 |
-
persist_directory = "docs/chroma/"
|
22 |
-
|
23 |
-
|
24 |
-
if os.path.exists(persist_directory) and os.listdir(persist_directory):
|
25 |
-
print("Loading existing vector database...")
|
26 |
-
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
27 |
-
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
|
28 |
-
else:
|
29 |
-
print("Creating new vector database...")
|
30 |
-
documents = []
|
31 |
-
for pdf_path in pdf_paths:
|
32 |
-
loader = PyPDFLoader(pdf_path)
|
33 |
-
documents.extend(loader.load())
|
34 |
-
|
35 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
36 |
-
chunk_size=1000,
|
37 |
-
chunk_overlap=200,
|
38 |
-
)
|
39 |
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
-
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
custom_template = """
|
49 |
You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
|
@@ -70,13 +67,13 @@ PROMPT = PromptTemplate(
|
|
70 |
template=custom_template, input_variables=["context", "question", "chat_history"]
|
71 |
)
|
72 |
|
73 |
-
retriever =
|
74 |
search_type="mmr",
|
75 |
search_kwargs={"k": 4, "fetch_k": 10},
|
76 |
)
|
77 |
|
78 |
llm = ChatOpenAI(
|
79 |
-
model="gpt-
|
80 |
temperature=0.1,
|
81 |
streaming=True,
|
82 |
)
|
|
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
12 |
+
from langchain_community.vectorstores import Qdrant
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
+
|
15 |
import chainlit as cl
|
16 |
|
17 |
load_dotenv()
|
|
|
20 |
"AI_Risk_Management_Framework.pdf",
|
21 |
"Blueprint-for-an-AI-Bill-of-Rights.pdf",
|
22 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
documents = []
|
25 |
+
for pdf_path in pdf_paths:
|
26 |
+
loader = PyPDFLoader(pdf_path)
|
27 |
+
documents.extend(loader.load())
|
28 |
|
29 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
30 |
+
chunk_size=1000,
|
31 |
+
chunk_overlap=200,
|
32 |
+
)
|
33 |
|
34 |
+
docs = text_splitter.split_documents(documents)
|
35 |
+
|
36 |
+
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
37 |
+
|
38 |
+
vectorstore = Qdrant.from_documents(
|
39 |
+
documents=docs,
|
40 |
+
embedding=embedding,
|
41 |
+
location=":memory:",
|
42 |
+
collection_name="Midterm Embedding Eval",
|
43 |
+
)
|
44 |
|
45 |
custom_template = """
|
46 |
You are an expert in artificial intelligence policy, ethics, and industry trends. Your task is to provide clear and accurate answers to questions related to AI's role in politics, government regulations, and its ethical implications for enterprises. Use reliable and up-to-date information from government documents, industry reports, and academic research to inform your responses. Make sure to consider how AI is evolving, especially in relation to the current political landscape, and provide answers in a way that is easy to understand for both AI professionals and non-experts.
|
|
|
67 |
template=custom_template, input_variables=["context", "question", "chat_history"]
|
68 |
)
|
69 |
|
70 |
+
retriever = vectorstore.as_retriever(
|
71 |
search_type="mmr",
|
72 |
search_kwargs={"k": 4, "fetch_k": 10},
|
73 |
)
|
74 |
|
75 |
llm = ChatOpenAI(
|
76 |
+
model="gpt-4",
|
77 |
temperature=0.1,
|
78 |
streaming=True,
|
79 |
)
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ python-dotenv==1.0.0
|
|
6 |
langchain
|
7 |
langchain_openai==0.1.1
|
8 |
pypdf
|
9 |
-
chromadb
|
|
|
|
6 |
langchain
|
7 |
langchain_openai==0.1.1
|
8 |
pypdf
|
9 |
+
chromadb
|
10 |
+
qdrant-client
|