mprofydocker / app.py
Maxx0's picture
Upload 4 files
bc9370d
# Import required libraries
import PyPDF2
from getpass import getpass
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document, Pipeline
from haystack.nodes import BM25Retriever
from pprint import pprint
import streamlit as st
import logging
from dotenv import load_dotenv
load_dotenv()
import os
import logging
logging.basicConfig(level=logging.DEBUG)
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() or ""
return text
# Extract text from the PDF file
pdf_file_path = "Data/MR. MPROFY.pdf"
pdf_text = extract_text_from_pdf(pdf_file_path)
if not pdf_text:
raise ValueError("No text extracted from PDF.")
# Create a Haystack document
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
# Initialize Document Store
document_store = InMemoryDocumentStore(use_bm25=True)
document_store.write_documents([doc])
# Initialize Retriever
retriever = BM25Retriever(document_store=document_store, top_k=2)
# Define QA Template
qa_template = PromptTemplate(
prompt="""
Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
I won’t ask any follow-up questions myself.
If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
Context: {join(documents)};
Question: {query}
Answer:
""",
output_parser=AnswerParser()
)
# Get Huggingface token
HF_TOKEN = HF_TOKEN
# Initialize Prompt Node
prompt_node = PromptNode(
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
api_key=HF_TOKEN,
default_prompt_template=qa_template,
max_length=500,
model_kwargs={"model_max_length": 5000}
)
# Build Pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
# Streamlit Function for Handling Input and Displaying Output
def run_streamlit_app():
st.title("Mprofier - AI Assistant")
query_text = st.text_input("Enter your question:")
if st.button("Get Answer"):
response = rag_pipeline.run(query=query_text)
answer = response["answers"][0].answer if response["answers"] else "No answer found."
st.write(answer)
# Start the Streamlit application
if __name__ == "__main__":
run_streamlit_app()