Andrew Lai commited on
Commit
536c385
·
1 Parent(s): accaa4c
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.psd filter=lfs diff=lfs merge=lfs -text
37
  *.csv filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.psd filter=lfs diff=lfs merge=lfs -text
37
  *.csv filter=lfs diff=lfs merge=lfs -text
38
+ *.cache filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -4,6 +4,8 @@ from google.colab import userdata
4
  from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
5
  from langchain.agents import initialize_agent, Tool, AgentExecutor
6
  from langchain_community.chat_models import ChatOpenAI
 
 
7
  import os
8
  import chainlit as cl
9
  import openai
@@ -28,6 +30,20 @@ from dotenv import dotenv_values
28
  # get keys
29
  my_secrets = dotenv_values("key.env")
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  #load the csv
32
  loader = TextLoader('data.csv')
33
  documents = loader.load()
@@ -44,7 +60,13 @@ docs = text_splitter.split_documents(documents)
44
 
45
  # create embeddings
46
  underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
47
- db = FAISS.from_documents(docs, underlying_embeddings)
 
 
 
 
 
 
48
 
49
  # Get the retriever for the Chat Model
50
  retriever = db.as_retriever(
 
4
  from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
5
  from langchain.agents import initialize_agent, Tool, AgentExecutor
6
  from langchain_community.chat_models import ChatOpenAI
7
+ from langchain.embeddings import CacheBackedEmbeddings
8
+ from langchain.storage import LocalFileStore
9
  import os
10
  import chainlit as cl
11
  import openai
 
30
  # get keys
31
  my_secrets = dotenv_values("key.env")
32
 
33
+ # download data
34
+ #dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
35
+ #split_name = "train" # Change this to the split you want to save
36
+ #data = dataset[split_name]
37
+
38
+ # Convert the dataset to a pandas DataFrame
39
+ #df = pd.DataFrame(data)
40
+
41
+ # Define the path where you want to save the CSV file
42
+ #csv_file_path = 'data.csv'
43
+
44
+ # Save the DataFrame to a CSV file
45
+ #df.to_csv(csv_file_path, index=False)
46
+
47
  #load the csv
48
  loader = TextLoader('data.csv')
49
  documents = loader.load()
 
60
 
61
  # create embeddings
62
  underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
63
+ store = LocalFileStore("./cache/")
64
+
65
+ cached_embedder = CacheBackedEmbeddings.from_bytes_store(
66
+ underlying_embeddings, store, namespace=underlying_embeddings.model
67
+ )
68
+
69
+ db = FAISS.from_documents(docs, cached_embedder)
70
 
71
  # Get the retriever for the Chat Model
72
  retriever = db.as_retriever(
cache/text-embedding-ada-00217774108-46e7-520b-ae73-abbed038011a.cache ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d362eb5113dd5397957c9e6a74ee8cf013f06796a13c6cc5d7dbea5c1155845
3
+ size 34480
chainlit.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # Welcome to Andy's Movie RAG for all your Movie Questions! 🚀🤖