André Oriani commited on
Commit
20b588a
·
1 Parent(s): 5c783da
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -18,6 +18,8 @@ print("""
18
  STARTING
19
  =================================================================================
20
  """)
 
 
21
  csv_path = "./imdb.csv"
22
  if not os.path.exists(csv_path):
23
  dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
@@ -26,9 +28,11 @@ if not os.path.exists(csv_path):
26
  loader = CSVLoader(file_path=csv_path)
27
  data = loader.load()
28
 
 
29
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
30
  chunked_documents = text_splitter.split_documents(data)
31
 
 
32
  embedding_model = OpenAIEmbeddings()
33
  store = LocalFileStore("./cache/")
34
  embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
 
18
  STARTING
19
  =================================================================================
20
  """)
21
+
22
+ # Download the data set and save as CSV if it doesn't exist yet.
23
  csv_path = "./imdb.csv"
24
  if not os.path.exists(csv_path):
25
  dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
 
28
  loader = CSVLoader(file_path=csv_path)
29
  data = loader.load()
30
 
31
+ # Split data in chunks
32
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
33
  chunked_documents = text_splitter.split_documents(data)
34
 
35
+ # Store the chunked documents in the vector store if that was not done already
36
  embedding_model = OpenAIEmbeddings()
37
  store = LocalFileStore("./cache/")
38
  embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)