Spaces:
Runtime error
Runtime error
File size: 3,750 Bytes
cacc27c e628706 43b89fd e628706 cacc27c e628706 20b588a e628706 20b588a e628706 20b588a e628706 43b89fd e628706 43b89fd cacc27c e628706 43b89fd cacc27c e628706 cacc27c e628706 cacc27c df05ac1 e628706 cacc27c df05ac1 af6d74d df05ac1 e628706 fc84e9c 4209e2a e628706 cacc27c e628706 cacc27c e628706 cacc27c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
import chainlit as cl
import os
from datasets import load_dataset
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableConfig
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_community.vectorstores import FAISS
from langchain_core.runnables.passthrough import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import asyncio
logging.info("""
=================================================================================
STARTING
=================================================================================
""")
# Download the data set and save as CSV if it doesn't exist yet.
csv_path = "./imdb.csv"
if not os.path.exists(csv_path):
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
dataset['train'].to_csv('imdb.csv')
loader = CSVLoader(file_path=csv_path)
data = loader.load()
# Split data in chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunked_documents = text_splitter.split_documents(data)
# Store the chunked documents in the vector store if that was not done already
embedding_model = OpenAIEmbeddings()
store = LocalFileStore("./cache/")
cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
index_path = "faiss_index"
if os.path.exists(index_path):
vector_store = FAISS.load_local(index_path, cached_embedder, allow_dangerous_deserialization=True)
logging.info("Vector store loaded from saved index.")
else:
vector_store = FAISS.from_documents(chunked_documents, cached_embedder)
logging.info("Vector store created from documents.")
vector_store.save_local(index_path)
logging.info("Vector store saved locally.")
@cl.on_chat_start
async def on_chat_start():
logging.info("""
=================================================================================
ON START CHAT
=================================================================================
""")
prompt_template = ChatPromptTemplate.from_template("""You are an AI agent that suggests movies to people.
Answer the question based on the context below. If the question cannot be answered provide answer with "Sorry, I cannot help you"
Context:
{context}
Question:{question}
""")
retriever = vector_store.as_retriever()
chat_model = ChatOpenAI(model="gpt-4o", temperature=0, streaming=True)
parser = StrOutputParser()
runnable = {"context": retriever, "question": RunnablePassthrough()} | prompt_template | chat_model | parser
cl.user_session.set("runnable", runnable)
@cl.on_message
async def on_message(message: cl.Message):
logging.info(f"""
=================================================================================
ON MESSAGE: {message.content}
=================================================================================
""")
runnable = cl.user_session.get("runnable")
await cl.Message(content="Lemme see what I can do for you...").send()
msg = cl.Message(content="")
async for chunk in runnable.astream(
message.content,
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
):
logging.info(f"Received chunk <{chunk}>")
await msg.stream_token(chunk)
logging.info(f"Sending message")
await msg.send()
logging.info(f"Done with <{message.content}>")
|