proj2 / app.py
Andrew Lai
update
536c385
from datasets import load_dataset
from huggingface_hub import list_datasets
from google.colab import userdata
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
from langchain.agents import initialize_agent, Tool, AgentExecutor
from langchain_community.chat_models import ChatOpenAI
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
import os
import chainlit as cl
import openai
from google.colab import userdata
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnableMap
from langchain.schema.output_parser import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
import pandas as pd
from langchain_openai import OpenAIEmbeddings
import openai
import asyncio
from dotenv import dotenv_values
# get keys
my_secrets = dotenv_values("key.env")
# download data
#dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
#split_name = "train" # Change this to the split you want to save
#data = dataset[split_name]
# Convert the dataset to a pandas DataFrame
#df = pd.DataFrame(data)
# Define the path where you want to save the CSV file
#csv_file_path = 'data.csv'
# Save the DataFrame to a CSV file
#df.to_csv(csv_file_path, index=False)
#load the csv
loader = TextLoader('data.csv')
documents = loader.load()
#split using recursive text splitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100,
length_function=len,
is_separator_regex=False,
)
docs = text_splitter.split_documents(documents)
# create embeddings
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
store = LocalFileStore("./cache/")
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
underlying_embeddings, store, namespace=underlying_embeddings.model
)
db = FAISS.from_documents(docs, cached_embedder)
# Get the retriever for the Chat Model
retriever = db.as_retriever(
search_kwargs={"k": 10}
)
@cl.on_chat_start
def start():
# Create the prompt template make sure it doesn't return data not in rag
template = """
You're a helpful AI assistent tasked to answer the user's questions about movies.
You can only make conversations based on the provided context about movies. If a response cannot be formed strictly using the context, politely say you don’t have knowledge about that topic under new line character 'ANSWER:' tag which is prefixed with new line character.
Remember, you must return both an answer under 'ANSWER:' tag which is prefixed with new line character and citations in line separated format of answer and bulleted list of citiations under 'CITATIONS:' tag. A citation consists of a VERBATIM quote that \
justifies the answer and the ID of the quoted article. Return a citation for every quote across all articles \
that justify the answer. Add a new line character after all citations. Use the following format for your final output:
new line character
ANSWER:
CITATIONS:
new line character
CONTEXT:
{context}
QUESTION: {question}
YOUR ANSWER:
"""
prompt = ChatPromptTemplate.from_messages([("system", template)])
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=my_secrets["OPEN_API_KEY"])
# Define the chain
inputs = RunnableMap({
'context': lambda x: retriever.get_relevant_documents(x['question']),
'question': lambda x: x['question']
})
#create runnable chain
runnable_chain = (
inputs |
prompt |
llm |
StrOutputParser()
)
cl.user_session.set("runnable_chain", runnable_chain)
@cl.on_message
async def on_message(message: cl.Message):
runnable_chain = cl.user_session.get("runnable_chain")
msg = message.content
result = runnable_chain.invoke({"question": msg})
#print(str(result))
await cl.Message(content=result).send()