|
from datasets import load_dataset |
|
from huggingface_hub import list_datasets |
|
from google.colab import userdata |
|
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper |
|
from langchain.agents import initialize_agent, Tool, AgentExecutor |
|
from langchain_community.chat_models import ChatOpenAI |
|
from langchain.embeddings import CacheBackedEmbeddings |
|
from langchain.storage import LocalFileStore |
|
import os |
|
import chainlit as cl |
|
import openai |
|
from google.colab import userdata |
|
from dotenv import load_dotenv |
|
from langchain_community.document_loaders import TextLoader |
|
from langchain_community.document_loaders.csv_loader import CSVLoader |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.storage import LocalFileStore |
|
from langchain.prompts import ChatPromptTemplate |
|
from langchain_openai import ChatOpenAI |
|
from langchain.schema.runnable import RunnableMap |
|
from langchain.schema.output_parser import StrOutputParser |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.output_parsers import ResponseSchema, StructuredOutputParser |
|
import pandas as pd |
|
from langchain_openai import OpenAIEmbeddings |
|
import openai |
|
import asyncio |
|
from dotenv import dotenv_values |
|
|
|
|
|
my_secrets = dotenv_values("key.env") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
loader = TextLoader('data.csv') |
|
documents = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=100, |
|
length_function=len, |
|
is_separator_regex=False, |
|
) |
|
|
|
docs = text_splitter.split_documents(documents) |
|
|
|
|
|
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"]) |
|
store = LocalFileStore("./cache/") |
|
|
|
cached_embedder = CacheBackedEmbeddings.from_bytes_store( |
|
underlying_embeddings, store, namespace=underlying_embeddings.model |
|
) |
|
|
|
db = FAISS.from_documents(docs, cached_embedder) |
|
|
|
|
|
retriever = db.as_retriever( |
|
search_kwargs={"k": 10} |
|
) |
|
|
|
|
|
@cl.on_chat_start |
|
def start(): |
|
|
|
|
|
template = """ |
|
You're a helpful AI assistent tasked to answer the user's questions about movies. |
|
You can only make conversations based on the provided context about movies. If a response cannot be formed strictly using the context, politely say you don’t have knowledge about that topic under new line character 'ANSWER:' tag which is prefixed with new line character. |
|
|
|
Remember, you must return both an answer under 'ANSWER:' tag which is prefixed with new line character and citations in line separated format of answer and bulleted list of citiations under 'CITATIONS:' tag. A citation consists of a VERBATIM quote that \ |
|
justifies the answer and the ID of the quoted article. Return a citation for every quote across all articles \ |
|
that justify the answer. Add a new line character after all citations. Use the following format for your final output: |
|
|
|
new line character |
|
ANSWER: |
|
|
|
CITATIONS: |
|
new line character |
|
|
|
CONTEXT: |
|
{context} |
|
|
|
QUESTION: {question} |
|
|
|
YOUR ANSWER: |
|
""" |
|
|
|
prompt = ChatPromptTemplate.from_messages([("system", template)]) |
|
|
|
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=my_secrets["OPEN_API_KEY"]) |
|
|
|
|
|
inputs = RunnableMap({ |
|
'context': lambda x: retriever.get_relevant_documents(x['question']), |
|
'question': lambda x: x['question'] |
|
}) |
|
|
|
|
|
runnable_chain = ( |
|
inputs | |
|
prompt | |
|
llm | |
|
StrOutputParser() |
|
) |
|
cl.user_session.set("runnable_chain", runnable_chain) |
|
|
|
|
|
@cl.on_message |
|
async def on_message(message: cl.Message): |
|
runnable_chain = cl.user_session.get("runnable_chain") |
|
msg = message.content |
|
|
|
result = runnable_chain.invoke({"question": msg}) |
|
|
|
|
|
await cl.Message(content=result).send() |
|
|