Maxx0 commited on
Commit
bc9370d
1 Parent(s): c9fcaad

Upload 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. app.py +85 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["streamlit","run","app.py", '--adress', '0.0.0.0', '--port', '7860', '--allow-websocket-origin', 'Maxx0-mprofy-panel.hf.space']
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required libraries
2
+ import PyPDF2
3
+ from getpass import getpass
4
+ from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
5
+ from haystack.document_stores import InMemoryDocumentStore
6
+ from haystack import Document, Pipeline
7
+ from haystack.nodes import BM25Retriever
8
+ from pprint import pprint
9
+ import streamlit as st
10
+ import logging
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+ import os
14
+ import logging
15
+ logging.basicConfig(level=logging.DEBUG)
16
+
17
+ # Function to extract text from a PDF
18
+ def extract_text_from_pdf(pdf_path):
19
+ text = ""
20
+ with open(pdf_path, "rb") as pdf_file:
21
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ page = pdf_reader.pages[page_num]
24
+ text += page.extract_text() or ""
25
+ return text
26
+
27
+ # Extract text from the PDF file
28
+ pdf_file_path = "Data/MR. MPROFY.pdf"
29
+ pdf_text = extract_text_from_pdf(pdf_file_path)
30
+ if not pdf_text:
31
+ raise ValueError("No text extracted from PDF.")
32
+
33
+ # Create a Haystack document
34
+ doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
35
+
36
+ # Initialize Document Store
37
+ document_store = InMemoryDocumentStore(use_bm25=True)
38
+ document_store.write_documents([doc])
39
+
40
+ # Initialize Retriever
41
+ retriever = BM25Retriever(document_store=document_store, top_k=2)
42
+
43
+ # Define QA Template
44
+ qa_template = PromptTemplate(
45
+ prompt="""
46
+ Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
47
+ I won’t ask any follow-up questions myself.
48
+ If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
49
+ Context: {join(documents)};
50
+ Question: {query}
51
+ Answer:
52
+ """,
53
+ output_parser=AnswerParser()
54
+ )
55
+
56
+ # Get Huggingface token
57
+ HF_TOKEN = HF_TOKEN
58
+
59
+ # Initialize Prompt Node
60
+ prompt_node = PromptNode(
61
+ model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
62
+ api_key=HF_TOKEN,
63
+ default_prompt_template=qa_template,
64
+ max_length=500,
65
+ model_kwargs={"model_max_length": 5000}
66
+ )
67
+
68
+ # Build Pipeline
69
+ rag_pipeline = Pipeline()
70
+ rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
71
+ rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
72
+
73
+ # Streamlit Function for Handling Input and Displaying Output
74
+ def run_streamlit_app():
75
+ st.title("Mprofier - AI Assistant")
76
+ query_text = st.text_input("Enter your question:")
77
+
78
+ if st.button("Get Answer"):
79
+ response = rag_pipeline.run(query=query_text)
80
+ answer = response["answers"][0].answer if response["answers"] else "No answer found."
81
+ st.write(answer)
82
+
83
+ # Start the Streamlit application
84
+ if __name__ == "__main__":
85
+ run_streamlit_app()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ PyPDF2
2
+ farm-haystack[inference]
3
+ haystack
4
+ streamlit
5
+ huggingface
6
+ langchain
7
+ load-dotenv