Spaces:
Runtime error
Runtime error
File size: 7,139 Bytes
3ec37c3 af34abb c867e5d af34abb 3ec37c3 c867e5d 3ec37c3 c867e5d af34abb 22d7e06 af34abb c867e5d af34abb c867e5d af34abb c867e5d 3ec37c3 286bd5e 3ec37c3 c867e5d 3ec37c3 c867e5d e87d89a 3ec37c3 e87d89a 3ec37c3 e87d89a ef7a628 e87d89a 3ec37c3 e87d89a ef7a628 3ec37c3 ef7a628 3ec37c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
# OpenAI Chat completion
import os
from openai import AsyncOpenAI # importing openai for API usage
import chainlit as cl # importing chainlit for our app
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
from dotenv import load_dotenv
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_community.vectorstores import FAISS
from datasets import load_dataset
from langchain_core.runnables.base import RunnableSequence
from langchain_core.runnables.passthrough import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import asyncio
load_dotenv()
# ChatOpenAI Templates
system_template = """You are a helpful assistant who always speaks in a pleasant tone!
"""
user_template = """{input}
Think through your response step by step.
"""
# used for imdb chat
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
def setup():
dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
print(dataset['train'][0])
print("data from huggingface dataset\n")
dataset_dict = dataset
dataset_dict["train"] # TODO - what method do we have to use to store imdb.csv from ShubhamChoksi/IMDB_Movies?
dataset_dict["train"].to_csv("imdb.csv")
loader = CSVLoader(file_path='imdb.csv')
data = loader.load()
len(data)
print(data[0])
print("loaded data from csv\n")
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 100,
)
chunked_documents = text_splitter.split_documents(data)
len(chunked_documents) # ensure we have actually split the data into chunks
print(chunked_documents[0])
openai_api_key = os.getenv("OPENAI_API_KEY")
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
store = LocalFileStore("./cache/")
embedder = CacheBackedEmbeddings.from_bytes_store(
embedding_model, store, namespace=embedding_model.model
)
vector_store = FAISS.from_documents(chunked_documents, embedder)
vector_store.save_local("./vector_store")
def input_query(query):
openai_api_key = os.getenv("OPENAI_API_KEY")
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
store = LocalFileStore("./cache/")
embedder = CacheBackedEmbeddings.from_bytes_store(
embedding_model, store, namespace=embedding_model.model
)
vector_store = FAISS.load_local("./vector_store", embedder, allow_dangerous_deserialization=True)
retriever = vector_store.as_retriever()
# query = "What are some good westerns movies?"
# embedded_query = embedding_model.embed_query(query)
# similar_documents = vector_store.similarity_search_by_vector(embedded_query)
# for page in similar_documents:
# # TODO: Print the similar documents that the similarity search returns?
# print(page)
# print("00-----0000")
# print(page)
# print("-------------")
embedded_query = embedding_model.embed_query(query)
similar_documents = vector_store.similarity_search_by_vector(embedded_query)
similar_documents_for_prompt = list(map(lambda page: ("assistant", page.page_content), similar_documents))
# print(similar_documents_for_prompt)
similar_documents_for_prompt.append(("human", query))
# print(similar_documents_for_prompt)
# Create the components (chefs)
# prompt_template = # TODO: How do we create a prompt template to send to our LLM that will incorporate the documents from our retriever with the question we ask the chat model?
alternative_prompt = ChatPromptTemplate.from_messages(
similar_documents_for_prompt
)
print("alternative prompt")
print(alternative_prompt.messages)
prompt = ChatPromptTemplate.from_template(template)
#retriever = # TODO: How to we create a retriever for our documents?
retriever = vector_store.as_retriever()
#chat_model = # TODO: How do we create a chat model / LLM?
chat_model = ChatOpenAI(openai_api_key=openai_api_key, temperature=0)
#parser = # TODO: How do we create a parser to parse the output of our LLM?
parser = StrOutputParser()
runnable_chain = alternative_prompt | chat_model | parser
return alternative_prompt, chat_model, parser
@cl.on_chat_start # marks a function that will be executed at the start of a user session
async def start_chat():
settings = {
"model": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 500,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
}
setup()
cl.user_session.set("settings", settings)
# need to pass the query to the input_query function
@cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
async def main(message: cl.Message):
settings = cl.user_session.get("settings")
client = AsyncOpenAI()
print(message.content)
# message.content is the input query from the user
prompt, model, parser = input_query(message.content)
# prompt = Prompt(
# provider=ChatOpenAI.id,
# messages=[
# PromptMessage(
# role="system",
# template=system_template,
# formatted=system_template,
# ),
# PromptMessage(
# role="user",
# template=user_template,
# formatted=user_template.format(input=message.content),
# ),
# ],
# inputs={"input": message.content},
# settings=settings,
# )
runnable_chain = prompt | model | parser
# output_chunks = runnable_chain.invoke({})
# print(''.join(output_chunks))
# print("output chunks")
# print([m.to_openai() for m in prompt.messages])
msg = cl.Message(content="")
output_stream = runnable_chain.astream({})
# async for chunk in output_stream:
# print(chunk, sep='', flush=True)
# Call OpenAI
# async for stream_resp in await client.chat.completions.create(
# messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
# ):
async for stream_resp in output_stream:
await msg.stream_token(stream_resp)
# token = stream_resp.choices[0].delta.content
# if not token:
# token = ""
# await msg.stream_token(token)
# Update the prompt object with the completion
# prompt.completion = msg.content
# msg.prompt = prompt
# Send and close the message stream
await msg.send()
|