svijayanand commited on
Commit
0e505da
·
verified ·
1 Parent(s): cb8132c

Update ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +6 -1
ingest_data.py CHANGED
@@ -42,6 +42,7 @@ def download_data_and_create_embedding():
42
  """
43
 
44
  # create a text splitter with 1000 character chunks and 100 character overlap?
 
45
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
46
  chunked_documents = text_splitter.split_documents(
47
  data
@@ -55,6 +56,7 @@ def download_data_and_create_embedding():
55
  For this exercise, we will use OpenAI's embedding model.
56
  """
57
 
 
58
  openai_api_key = os.getenv("OPENAI_API_KEY")
59
  # create our embedding model
60
  embedding_model = OpenAIEmbeddings(
@@ -70,12 +72,14 @@ def download_data_and_create_embedding():
70
  # create a local file store to for our cached embeddings
71
  store = LocalFileStore(
72
  "./cache/"
73
- )
 
74
  embedder = CacheBackedEmbeddings.from_bytes_store(
75
  underlying_embeddings, store, namespace=underlying_embeddings.model
76
  )
77
 
78
  # Create vector store using Facebook AI Similarity Search (FAISS)
 
79
  vector_store = FAISS.from_documents(
80
  documents=chunked_documents, embedding=embedder
81
  ) # TODO: How do we create our vector store using FAISS?
@@ -83,6 +87,7 @@ def download_data_and_create_embedding():
83
 
84
 
85
  # save our vector store locally
 
86
  vector_store.save_local("faiss_index")
87
 
88
  return vector_store
 
42
  """
43
 
44
  # create a text splitter with 1000 character chunks and 100 character overlap?
45
+ print("setup chunking...")
46
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
47
  chunked_documents = text_splitter.split_documents(
48
  data
 
56
  For this exercise, we will use OpenAI's embedding model.
57
  """
58
 
59
+ print("Setup Embedding Model...")
60
  openai_api_key = os.getenv("OPENAI_API_KEY")
61
  # create our embedding model
62
  embedding_model = OpenAIEmbeddings(
 
72
  # create a local file store to for our cached embeddings
73
  store = LocalFileStore(
74
  "./cache/"
75
+ )
76
+ print("setup Cache Backed Embedder...")
77
  embedder = CacheBackedEmbeddings.from_bytes_store(
78
  underlying_embeddings, store, namespace=underlying_embeddings.model
79
  )
80
 
81
  # Create vector store using Facebook AI Similarity Search (FAISS)
82
+ print("load documents to vector store...")
83
  vector_store = FAISS.from_documents(
84
  documents=chunked_documents, embedding=embedder
85
  ) # TODO: How do we create our vector store using FAISS?
 
87
 
88
 
89
  # save our vector store locally
90
+ print("save vector store locally...")
91
  vector_store.save_local("faiss_index")
92
 
93
  return vector_store