from langchain.text_splitter import CharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain.schema.runnable import RunnablePassthrough from langchain.schema.output_parser import StrOutputParser from langchain_pinecone import PineconeVectorStore from langchain.prompts import PromptTemplate from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings from dotenv import load_dotenv, find_dotenv import os from pinecone import Pinecone, PodSpec load_dotenv(find_dotenv()) class Chatbot(): loader = TextLoader('dataset.txt', autodetect_encoding=True) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4) docs = text_splitter.split_documents(documents) embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY") ) pinecone = Pinecone( api_key=os.environ.get("PINECONE_API_KEY") # host='gcp-starter' ) index_name = "chatbot" if index_name not in pinecone.list_indexes().names(): pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) else: docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) template = """ INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \ to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ If you don't know any ANSWER, say you don't know \ Always follow general guardrails before generating any response. \ Always try to keep the conversation in context to Rohan Shaw. Keep your replies very short \ compassionate and informative.\ Give the answer from the CONTEXT\ You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw. The ANSWER should be very short and compelling\ CONTEXT: {context} QUESTION: {question} ANSWER: """ prompt = PromptTemplate( template=template, input_variables=["context", "question"] ) rag_chain = ( {"context": docsearch.as_retriever(), "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() )