svijayanand commited on
Commit
b99197b
·
verified ·
1 Parent(s): 27d0a53

Update ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +4 -20
ingest_data.py CHANGED
@@ -18,12 +18,12 @@ underlying_embeddings = OpenAIEmbeddings(api_key=openai_api_key)
18
 
19
  def download_data_and_create_embedding():
20
  # Download an IMDB datset from Hugging Face Hub, load the ShubhamChoksi/IMDB_Movies dataset
21
- dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
22
- print(dataset)
23
 
24
  # store imdb.csv from ShubhamChoksi/IMDB_Movies
25
- dataset_dict = dataset
26
- dataset_dict["train"].to_csv("imdb.csv")
27
 
28
  # load the csv file exported into a document
29
  loader = CSVLoader("imdb.csv") # TODO
@@ -85,20 +85,4 @@ def download_data_and_create_embedding():
85
  # save our vector store locally
86
  vector_store.save_local("faiss_index")
87
 
88
- query_embedding(vector_store=vector_store)
89
-
90
  return vector_store
91
-
92
- def query_embedding(vector_store) -> None:
93
- # Ask your RAG system a question!
94
- query = "What are some good sci-fi movies from the 1980s?"
95
-
96
- # embed our query
97
- embedded_query = underlying_embeddings.embed_query(query)
98
- similar_documents = vector_store.similarity_search_by_vector(
99
- embedded_query
100
- ) # TODO: How do we do a similarity search to find documents similar to our query?
101
-
102
- for page in similar_documents:
103
- # Print the similar documents that the similarity search returns?
104
- print(page.page_content)
 
18
 
19
  def download_data_and_create_embedding():
20
  # Download an IMDB datset from Hugging Face Hub, load the ShubhamChoksi/IMDB_Movies dataset
21
+ # dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
22
+ # print(dataset)
23
 
24
  # store imdb.csv from ShubhamChoksi/IMDB_Movies
25
+ # dataset_dict = dataset
26
+ # dataset_dict["train"].to_csv("imdb.csv")
27
 
28
  # load the csv file exported into a document
29
  loader = CSVLoader("imdb.csv") # TODO
 
85
  # save our vector store locally
86
  vector_store.save_local("faiss_index")
87
 
 
 
88
  return vector_store