rspfacbapi / chatbot.py
rohanshaw's picture
Update chatbot.py
48991d7 verified
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv, find_dotenv
import os
from pinecone import Pinecone, PodSpec
load_dotenv(find_dotenv())
class Chatbot():
loader = TextLoader('dataset.txt', autodetect_encoding=True)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4)
docs = text_splitter.split_documents(documents)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
)
pinecone = Pinecone(
api_key=os.environ.get("PINECONE_API_KEY")
# host='gcp-starter'
)
index_name = "chatbot"
if index_name not in pinecone.list_indexes().names():
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
else:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
template = """
INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \
to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
If you don't know any ANSWER, say you don't know \
Always follow general guardrails before generating any response. \
Always try to keep the conversation in context to Rohan Shaw. Keep your replies very short \
compassionate and informative.\
Give the answer from the CONTEXT\
You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw. The ANSWER should be very short and compelling\
CONTEXT: {context}
QUESTION: {question}
ANSWER:
"""
prompt = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
rag_chain = (
{"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)