Spaces:
Build error
Build error
Update ingest_data.py
Browse files- ingest_data.py +4 -20
ingest_data.py
CHANGED
@@ -18,12 +18,12 @@ underlying_embeddings = OpenAIEmbeddings(api_key=openai_api_key)
|
|
18 |
|
19 |
def download_data_and_create_embedding():
|
20 |
# Download an IMDB datset from Hugging Face Hub, load the ShubhamChoksi/IMDB_Movies dataset
|
21 |
-
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
22 |
-
print(dataset)
|
23 |
|
24 |
# store imdb.csv from ShubhamChoksi/IMDB_Movies
|
25 |
-
dataset_dict = dataset
|
26 |
-
dataset_dict["train"].to_csv("imdb.csv")
|
27 |
|
28 |
# load the csv file exported into a document
|
29 |
loader = CSVLoader("imdb.csv") # TODO
|
@@ -85,20 +85,4 @@ def download_data_and_create_embedding():
|
|
85 |
# save our vector store locally
|
86 |
vector_store.save_local("faiss_index")
|
87 |
|
88 |
-
query_embedding(vector_store=vector_store)
|
89 |
-
|
90 |
return vector_store
|
91 |
-
|
92 |
-
def query_embedding(vector_store) -> None:
|
93 |
-
# Ask your RAG system a question!
|
94 |
-
query = "What are some good sci-fi movies from the 1980s?"
|
95 |
-
|
96 |
-
# embed our query
|
97 |
-
embedded_query = underlying_embeddings.embed_query(query)
|
98 |
-
similar_documents = vector_store.similarity_search_by_vector(
|
99 |
-
embedded_query
|
100 |
-
) # TODO: How do we do a similarity search to find documents similar to our query?
|
101 |
-
|
102 |
-
for page in similar_documents:
|
103 |
-
# Print the similar documents that the similarity search returns?
|
104 |
-
print(page.page_content)
|
|
|
18 |
|
19 |
def download_data_and_create_embedding():
|
20 |
# Download an IMDB datset from Hugging Face Hub, load the ShubhamChoksi/IMDB_Movies dataset
|
21 |
+
# dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
22 |
+
# print(dataset)
|
23 |
|
24 |
# store imdb.csv from ShubhamChoksi/IMDB_Movies
|
25 |
+
# dataset_dict = dataset
|
26 |
+
# dataset_dict["train"].to_csv("imdb.csv")
|
27 |
|
28 |
# load the csv file exported into a document
|
29 |
loader = CSVLoader("imdb.csv") # TODO
|
|
|
85 |
# save our vector store locally
|
86 |
vector_store.save_local("faiss_index")
|
87 |
|
|
|
|
|
88 |
return vector_store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|