Spaces:
Runtime error
Runtime error
André Oriani
commited on
Commit
·
20b588a
1
Parent(s):
5c783da
moar log
Browse files
app.py
CHANGED
@@ -18,6 +18,8 @@ print("""
|
|
18 |
STARTING
|
19 |
=================================================================================
|
20 |
""")
|
|
|
|
|
21 |
csv_path = "./imdb.csv"
|
22 |
if not os.path.exists(csv_path):
|
23 |
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
@@ -26,9 +28,11 @@ if not os.path.exists(csv_path):
|
|
26 |
loader = CSVLoader(file_path=csv_path)
|
27 |
data = loader.load()
|
28 |
|
|
|
29 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
30 |
chunked_documents = text_splitter.split_documents(data)
|
31 |
|
|
|
32 |
embedding_model = OpenAIEmbeddings()
|
33 |
store = LocalFileStore("./cache/")
|
34 |
embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
|
|
|
18 |
STARTING
|
19 |
=================================================================================
|
20 |
""")
|
21 |
+
|
22 |
+
# Download the data set and save as CSV if it doesn't exist yet.
|
23 |
csv_path = "./imdb.csv"
|
24 |
if not os.path.exists(csv_path):
|
25 |
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
|
|
28 |
loader = CSVLoader(file_path=csv_path)
|
29 |
data = loader.load()
|
30 |
|
31 |
+
# Split data in chunks
|
32 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
33 |
chunked_documents = text_splitter.split_documents(data)
|
34 |
|
35 |
+
# Store the chunked documents in the vector store if that was not done already
|
36 |
embedding_model = OpenAIEmbeddings()
|
37 |
store = LocalFileStore("./cache/")
|
38 |
embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
|