Andrew Lai
commited on
Commit
·
536c385
1
Parent(s):
accaa4c
update
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- app.py +23 -1
- cache/text-embedding-ada-00217774108-46e7-520b-ae73-abbed038011a.cache +3 -0
- chainlit.md +1 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitattributes
CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.psd filter=lfs diff=lfs merge=lfs -text
|
37 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.psd filter=lfs diff=lfs merge=lfs -text
|
37 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.cache filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -4,6 +4,8 @@ from google.colab import userdata
|
|
4 |
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
|
5 |
from langchain.agents import initialize_agent, Tool, AgentExecutor
|
6 |
from langchain_community.chat_models import ChatOpenAI
|
|
|
|
|
7 |
import os
|
8 |
import chainlit as cl
|
9 |
import openai
|
@@ -28,6 +30,20 @@ from dotenv import dotenv_values
|
|
28 |
# get keys
|
29 |
my_secrets = dotenv_values("key.env")
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
#load the csv
|
32 |
loader = TextLoader('data.csv')
|
33 |
documents = loader.load()
|
@@ -44,7 +60,13 @@ docs = text_splitter.split_documents(documents)
|
|
44 |
|
45 |
# create embeddings
|
46 |
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Get the retriever for the Chat Model
|
50 |
retriever = db.as_retriever(
|
|
|
4 |
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
|
5 |
from langchain.agents import initialize_agent, Tool, AgentExecutor
|
6 |
from langchain_community.chat_models import ChatOpenAI
|
7 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
8 |
+
from langchain.storage import LocalFileStore
|
9 |
import os
|
10 |
import chainlit as cl
|
11 |
import openai
|
|
|
30 |
# get keys
|
31 |
my_secrets = dotenv_values("key.env")
|
32 |
|
33 |
+
# download data
|
34 |
+
#dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
35 |
+
#split_name = "train" # Change this to the split you want to save
|
36 |
+
#data = dataset[split_name]
|
37 |
+
|
38 |
+
# Convert the dataset to a pandas DataFrame
|
39 |
+
#df = pd.DataFrame(data)
|
40 |
+
|
41 |
+
# Define the path where you want to save the CSV file
|
42 |
+
#csv_file_path = 'data.csv'
|
43 |
+
|
44 |
+
# Save the DataFrame to a CSV file
|
45 |
+
#df.to_csv(csv_file_path, index=False)
|
46 |
+
|
47 |
#load the csv
|
48 |
loader = TextLoader('data.csv')
|
49 |
documents = loader.load()
|
|
|
60 |
|
61 |
# create embeddings
|
62 |
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
|
63 |
+
store = LocalFileStore("./cache/")
|
64 |
+
|
65 |
+
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
66 |
+
underlying_embeddings, store, namespace=underlying_embeddings.model
|
67 |
+
)
|
68 |
+
|
69 |
+
db = FAISS.from_documents(docs, cached_embedder)
|
70 |
|
71 |
# Get the retriever for the Chat Model
|
72 |
retriever = db.as_retriever(
|
cache/text-embedding-ada-00217774108-46e7-520b-ae73-abbed038011a.cache
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d362eb5113dd5397957c9e6a74ee8cf013f06796a13c6cc5d7dbea5c1155845
|
3 |
+
size 34480
|
chainlit.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Welcome to Andy's Movie RAG for all your Movie Questions! 🚀🤖
|