Spaces:
Build error
Build error
Update ingest_data.py
Browse files- ingest_data.py +6 -1
ingest_data.py
CHANGED
@@ -42,6 +42,7 @@ def download_data_and_create_embedding():
|
|
42 |
"""
|
43 |
|
44 |
# create a text splitter with 1000 character chunks and 100 character overlap?
|
|
|
45 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
46 |
chunked_documents = text_splitter.split_documents(
|
47 |
data
|
@@ -55,6 +56,7 @@ def download_data_and_create_embedding():
|
|
55 |
For this exercise, we will use OpenAI's embedding model.
|
56 |
"""
|
57 |
|
|
|
58 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
59 |
# create our embedding model
|
60 |
embedding_model = OpenAIEmbeddings(
|
@@ -70,12 +72,14 @@ def download_data_and_create_embedding():
|
|
70 |
# create a local file store to for our cached embeddings
|
71 |
store = LocalFileStore(
|
72 |
"./cache/"
|
73 |
-
)
|
|
|
74 |
embedder = CacheBackedEmbeddings.from_bytes_store(
|
75 |
underlying_embeddings, store, namespace=underlying_embeddings.model
|
76 |
)
|
77 |
|
78 |
# Create vector store using Facebook AI Similarity Search (FAISS)
|
|
|
79 |
vector_store = FAISS.from_documents(
|
80 |
documents=chunked_documents, embedding=embedder
|
81 |
) # TODO: How do we create our vector store using FAISS?
|
@@ -83,6 +87,7 @@ def download_data_and_create_embedding():
|
|
83 |
|
84 |
|
85 |
# save our vector store locally
|
|
|
86 |
vector_store.save_local("faiss_index")
|
87 |
|
88 |
return vector_store
|
|
|
42 |
"""
|
43 |
|
44 |
# create a text splitter with 1000 character chunks and 100 character overlap?
|
45 |
+
print("setup chunking...")
|
46 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
47 |
chunked_documents = text_splitter.split_documents(
|
48 |
data
|
|
|
56 |
For this exercise, we will use OpenAI's embedding model.
|
57 |
"""
|
58 |
|
59 |
+
print("Setup Embedding Model...")
|
60 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
61 |
# create our embedding model
|
62 |
embedding_model = OpenAIEmbeddings(
|
|
|
72 |
# create a local file store to for our cached embeddings
|
73 |
store = LocalFileStore(
|
74 |
"./cache/"
|
75 |
+
)
|
76 |
+
print("setup Cache Backed Embedder...")
|
77 |
embedder = CacheBackedEmbeddings.from_bytes_store(
|
78 |
underlying_embeddings, store, namespace=underlying_embeddings.model
|
79 |
)
|
80 |
|
81 |
# Create vector store using Facebook AI Similarity Search (FAISS)
|
82 |
+
print("load documents to vector store...")
|
83 |
vector_store = FAISS.from_documents(
|
84 |
documents=chunked_documents, embedding=embedder
|
85 |
) # TODO: How do we create our vector store using FAISS?
|
|
|
87 |
|
88 |
|
89 |
# save our vector store locally
|
90 |
+
print("save vector store locally...")
|
91 |
vector_store.save_local("faiss_index")
|
92 |
|
93 |
return vector_store
|