Andrew Lai
commited on
Commit
·
536c385
1
Parent(s):
accaa4c
update
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- app.py +23 -1
- cache/text-embedding-ada-00217774108-46e7-520b-ae73-abbed038011a.cache +3 -0
- chainlit.md +1 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
*.psd filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
*.psd filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.cache filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -4,6 +4,8 @@ from google.colab import userdata
|
|
| 4 |
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
|
| 5 |
from langchain.agents import initialize_agent, Tool, AgentExecutor
|
| 6 |
from langchain_community.chat_models import ChatOpenAI
|
|
|
|
|
|
|
| 7 |
import os
|
| 8 |
import chainlit as cl
|
| 9 |
import openai
|
|
@@ -28,6 +30,20 @@ from dotenv import dotenv_values
|
|
| 28 |
# get keys
|
| 29 |
my_secrets = dotenv_values("key.env")
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
#load the csv
|
| 32 |
loader = TextLoader('data.csv')
|
| 33 |
documents = loader.load()
|
|
@@ -44,7 +60,13 @@ docs = text_splitter.split_documents(documents)
|
|
| 44 |
|
| 45 |
# create embeddings
|
| 46 |
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# Get the retriever for the Chat Model
|
| 50 |
retriever = db.as_retriever(
|
|
|
|
| 4 |
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
|
| 5 |
from langchain.agents import initialize_agent, Tool, AgentExecutor
|
| 6 |
from langchain_community.chat_models import ChatOpenAI
|
| 7 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
| 8 |
+
from langchain.storage import LocalFileStore
|
| 9 |
import os
|
| 10 |
import chainlit as cl
|
| 11 |
import openai
|
|
|
|
| 30 |
# get keys
|
| 31 |
my_secrets = dotenv_values("key.env")
|
| 32 |
|
| 33 |
+
# download data
|
| 34 |
+
#dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
| 35 |
+
#split_name = "train" # Change this to the split you want to save
|
| 36 |
+
#data = dataset[split_name]
|
| 37 |
+
|
| 38 |
+
# Convert the dataset to a pandas DataFrame
|
| 39 |
+
#df = pd.DataFrame(data)
|
| 40 |
+
|
| 41 |
+
# Define the path where you want to save the CSV file
|
| 42 |
+
#csv_file_path = 'data.csv'
|
| 43 |
+
|
| 44 |
+
# Save the DataFrame to a CSV file
|
| 45 |
+
#df.to_csv(csv_file_path, index=False)
|
| 46 |
+
|
| 47 |
#load the csv
|
| 48 |
loader = TextLoader('data.csv')
|
| 49 |
documents = loader.load()
|
|
|
|
| 60 |
|
| 61 |
# create embeddings
|
| 62 |
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
|
| 63 |
+
store = LocalFileStore("./cache/")
|
| 64 |
+
|
| 65 |
+
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
| 66 |
+
underlying_embeddings, store, namespace=underlying_embeddings.model
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
db = FAISS.from_documents(docs, cached_embedder)
|
| 70 |
|
| 71 |
# Get the retriever for the Chat Model
|
| 72 |
retriever = db.as_retriever(
|
cache/text-embedding-ada-00217774108-46e7-520b-ae73-abbed038011a.cache
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d362eb5113dd5397957c9e6a74ee8cf013f06796a13c6cc5d7dbea5c1155845
|
| 3 |
+
size 34480
|
chainlit.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Welcome to Andy's Movie RAG for all your Movie Questions! 🚀🤖
|