Spaces:
Runtime error
Runtime error
Victor Hom
commited on
Commit
·
af34abb
1
Parent(s):
cd03d4e
update
Browse files
app.py
CHANGED
@@ -7,6 +7,15 @@ import chainlit as cl # importing chainlit for our app
|
|
7 |
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
load_dotenv()
|
12 |
|
@@ -18,6 +27,59 @@ user_template = """{input}
|
|
18 |
Think through your response step by step.
|
19 |
"""
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
@cl.on_chat_start # marks a function that will be executed at the start of a user session
|
23 |
async def start_chat():
|
|
|
7 |
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
8 |
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
9 |
from dotenv import load_dotenv
|
10 |
+
from langchain.document_loaders import CSVLoader
|
11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
+
from langchain_openai import OpenAIEmbeddings
|
13 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
14 |
+
from langchain.storage import LocalFileStore
|
15 |
+
from langchain_community.vectorstores import FAISS
|
16 |
+
from datasets import load_dataset
|
17 |
+
|
18 |
+
|
19 |
|
20 |
load_dotenv()
|
21 |
|
|
|
27 |
Think through your response step by step.
|
28 |
"""
|
29 |
|
30 |
+
def setup():
|
31 |
+
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
|
32 |
+
print(dataset['train'][0])
|
33 |
+
print("data from huggingface dataset\n")
|
34 |
+
|
35 |
+
dataset_dict = dataset
|
36 |
+
dataset_dict["train"] # TODO - what method do we have to use to store imdb.csv from ShubhamChoksi/IMDB_Movies?
|
37 |
+
|
38 |
+
dataset_dict["train"].to_csv("imdb.csv")
|
39 |
+
loader = CSVLoader(file_path='imdb.csv')
|
40 |
+
data = loader.load()
|
41 |
+
len(data)
|
42 |
+
print(data[0])
|
43 |
+
print("loaded data from csv\n")
|
44 |
+
|
45 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
46 |
+
chunk_size = 1000,
|
47 |
+
chunk_overlap = 100,
|
48 |
+
)
|
49 |
+
|
50 |
+
chunked_documents = text_splitter.split_documents(data)
|
51 |
+
|
52 |
+
|
53 |
+
len(chunked_documents) # ensure we have actually split the data into chunks
|
54 |
+
print(chunked_documents[0])
|
55 |
+
|
56 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
57 |
+
|
58 |
+
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
59 |
+
|
60 |
+
|
61 |
+
store = LocalFileStore("./cache/")
|
62 |
+
embedder = CacheBackedEmbeddings.from_bytes_store(
|
63 |
+
embedding_model, store, namespace=embedding_model.model
|
64 |
+
)
|
65 |
+
|
66 |
+
vector_store = FAISS.from_documents(chunked_documents, embedder)
|
67 |
+
|
68 |
+
vector_store.save_local("./vector_store")
|
69 |
+
|
70 |
+
vector_store = FAISS.load_local("./vector_store", embedder, allow_dangerous_deserialization=True)
|
71 |
+
|
72 |
+
retriever = vector_store.as_retriever()
|
73 |
+
|
74 |
+
query = "What are some good westerns movies?"
|
75 |
+
embedded_query = embedding_model.embed_query(query)
|
76 |
+
similar_documents = vector_store.similarity_search_by_vector(embedded_query)
|
77 |
+
for page in similar_documents:
|
78 |
+
# TODO: Print the similar documents that the similarity search returns?
|
79 |
+
print(page)
|
80 |
+
print("00-----0000")
|
81 |
+
print(page)
|
82 |
+
print("-------------")
|
83 |
|
84 |
@cl.on_chat_start # marks a function that will be executed at the start of a user session
|
85 |
async def start_chat():
|