Spaces:
Sleeping
Sleeping
initial code for front end
Browse files- DockerFile +0 -0
- app.py +1 -1
- db_utils.py β backend/db_utils.py +0 -0
- langchain_utils.py β backend/langchain_utils.py +1 -1
- backend/main.py +161 -0
- pinecone_utilis.py β backend/pinecone_utilis.py +0 -0
- pydantic_models.py β backend/pydantic_models.py +0 -0
- requirements.txt β backend/requirements.txt +0 -0
- backend/utilis.py +0 -57
- docker-compose.yml +0 -0
- frontend/app.py +160 -0
- main.py +0 -160
- ui.py β test.py +2 -2
DockerFile
ADDED
|
File without changes
|
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from pinecone_utilis import create_pinecone_vectorstore,load_and_split_document, index_document_to_pinecone
|
| 2 |
|
| 3 |
file_path="InternTaskGenAI.pdf"
|
| 4 |
|
|
|
|
| 1 |
+
from backend.pinecone_utilis import create_pinecone_vectorstore,load_and_split_document, index_document_to_pinecone
|
| 2 |
|
| 3 |
file_path="InternTaskGenAI.pdf"
|
| 4 |
|
db_utils.py β backend/db_utils.py
RENAMED
|
File without changes
|
langchain_utils.py β backend/langchain_utils.py
RENAMED
|
@@ -7,7 +7,7 @@ from typing import List
|
|
| 7 |
from typing_extensions import List, TypedDict
|
| 8 |
from langchain_core.documents import Document
|
| 9 |
import os
|
| 10 |
-
from pinecone_utilis import vectorstore
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
| 13 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
|
|
|
| 7 |
from typing_extensions import List, TypedDict
|
| 8 |
from langchain_core.documents import Document
|
| 9 |
import os
|
| 10 |
+
from backend.pinecone_utilis import vectorstore
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
| 13 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
backend/main.py
CHANGED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
+
from backend.pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest, ChallengeRequest, EvaluateAnswer
|
| 3 |
+
from backend.langchain_utils import generate_response, retrieve
|
| 4 |
+
from backend.db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record, get_file_content
|
| 5 |
+
from backend.pinecone_utilis import index_document_to_pinecone, delete_doc_from_pinecone, load_and_split_document
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 8 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
+
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
|
| 10 |
+
import os
|
| 11 |
+
import uuid
|
| 12 |
+
import logging
|
| 13 |
+
import shutil
|
| 14 |
+
|
| 15 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Set up logging
|
| 19 |
+
logging.basicConfig(filename='app.log', level=logging.INFO)
|
| 20 |
+
|
| 21 |
+
# Initialize FastAPI app
|
| 22 |
+
app = FastAPI()
|
| 23 |
+
|
| 24 |
+
@app.post("/chat", response_model=QueryResponse)
|
| 25 |
+
def chat(query_input: QueryInput):
|
| 26 |
+
session_id = query_input.session_id or str(uuid.uuid4())
|
| 27 |
+
logging.info(f"Session ID: {session_id}, User Query: {query_input.question}, Model: {query_input.model.value}")
|
| 28 |
+
chat_history = get_chat_history(session_id)
|
| 29 |
+
print(chat_history)
|
| 30 |
+
state={"messages":[]} # test
|
| 31 |
+
messages_state = generate_response(query=query_input.question, state=state)
|
| 32 |
+
answer=messages_state["messages"][-1].content
|
| 33 |
+
|
| 34 |
+
insert_application_logs(session_id, query_input.question, answer, query_input.model.value)
|
| 35 |
+
logging.info(f"Session ID: {session_id}, AI Response: {answer}")
|
| 36 |
+
return QueryResponse(answer=answer, session_id=session_id, model=query_input.model)
|
| 37 |
+
|
| 38 |
+
@app.post('/challenge-me', response_model=list[str])
|
| 39 |
+
def challenge_me(request: ChallengeRequest):
|
| 40 |
+
file_id = request.file_id
|
| 41 |
+
|
| 42 |
+
content = get_file_content(file_id)
|
| 43 |
+
if content is None:
|
| 44 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
llm = ChatOpenAI(
|
| 48 |
+
model='gpt-4.1',
|
| 49 |
+
api_key=OPENAI_API_KEY
|
| 50 |
+
)
|
| 51 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 52 |
+
("system", "You are a helpful AI assistant. Generate three logic-based or comprehension-focused questions about the following document. Each question should require understanding or reasoning about the document content, not just simple recall. Provide each question on a new line."),
|
| 53 |
+
("human", "Document: {context}\n\nQuestions:")
|
| 54 |
+
])
|
| 55 |
+
chain = prompt | llm | StrOutputParser()
|
| 56 |
+
questions_str = chain.invoke({"context": content})
|
| 57 |
+
questions = [q.strip() for q in questions_str.split('\n') if q.strip()][:3]
|
| 58 |
+
|
| 59 |
+
return questions
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@app.post('/evaluate-response')
|
| 64 |
+
def evaluate_response(request: EvaluateAnswer):
|
| 65 |
+
# get the file ralated to answers
|
| 66 |
+
file_id = request.file_id
|
| 67 |
+
question = request.question
|
| 68 |
+
user_answer=request.user_answer
|
| 69 |
+
|
| 70 |
+
# evaluate the useranswer according to the research paper
|
| 71 |
+
|
| 72 |
+
llm = ChatOpenAI(
|
| 73 |
+
model='gpt-4.1',
|
| 74 |
+
api_key=OPENAI_API_KEY
|
| 75 |
+
)
|
| 76 |
+
# get the context from doc
|
| 77 |
+
retrieved_docs=retrieve(query=question)
|
| 78 |
+
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 82 |
+
("system", "You are a helpful AI assistant. Your task is to evaluate the user's answer to a question, using ONLY the information below as reference. If the answer is not correct, explain why and provide the correct answer with justification from the document. Do not make up information."),
|
| 83 |
+
("system", "Context: {context}"),
|
| 84 |
+
("human", "Question: {question}\nUser Answer: {user_answer}\nEvaluation:")
|
| 85 |
+
])
|
| 86 |
+
|
| 87 |
+
chain = prompt | llm | StrOutputParser()
|
| 88 |
+
evaluation = chain.invoke({
|
| 89 |
+
"context": docs_content,
|
| 90 |
+
"question": question,
|
| 91 |
+
"user_answer": user_answer
|
| 92 |
+
})
|
| 93 |
+
|
| 94 |
+
return {
|
| 95 |
+
"feedback": evaluation,
|
| 96 |
+
"file_id": file_id
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
@app.post("/upload-doc")
|
| 102 |
+
def upload_and_index_document(file: UploadFile = File(...)):
|
| 103 |
+
allowed_extensions = ['.pdf', '.txt']
|
| 104 |
+
file_extension = os.path.splitext(file.filename)[1].lower()
|
| 105 |
+
|
| 106 |
+
if file_extension not in allowed_extensions:
|
| 107 |
+
raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed types are: {', '.join(allowed_extensions)}")
|
| 108 |
+
|
| 109 |
+
temp_file_path = f"temp_{file.filename}"
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
# Save the uploaded file to a temporary file
|
| 113 |
+
with open(temp_file_path, "wb") as buffer:
|
| 114 |
+
shutil.copyfileobj(file.file, buffer)
|
| 115 |
+
docs = load_and_split_document(temp_file_path)
|
| 116 |
+
docs_content = "\n\n".join(doc.page_content for doc in docs)
|
| 117 |
+
file_id = insert_document_record(file.filename, docs_content)
|
| 118 |
+
success = index_document_to_pinecone(temp_file_path, file_id)
|
| 119 |
+
|
| 120 |
+
if success:
|
| 121 |
+
# generate summary
|
| 122 |
+
|
| 123 |
+
llm = ChatOpenAI(
|
| 124 |
+
model='gpt-4.1',
|
| 125 |
+
api_key=OPENAI_API_KEY
|
| 126 |
+
)
|
| 127 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 128 |
+
("system", "You are a helpful assistant. Summarize the following document in no more than 150 words. Focus on the main points and key findings. Do not include information not present in the document."),
|
| 129 |
+
("human", "{document}")
|
| 130 |
+
])
|
| 131 |
+
chain = prompt | llm | StrOutputParser()
|
| 132 |
+
summary = chain.invoke({"document": docs_content})
|
| 133 |
+
return {
|
| 134 |
+
"message": f"File {file.filename} has been successfully uploaded and indexed.",
|
| 135 |
+
"file_id": file_id,
|
| 136 |
+
"summary": summary
|
| 137 |
+
}
|
| 138 |
+
else:
|
| 139 |
+
delete_document_record(file_id)
|
| 140 |
+
raise HTTPException(status_code=500, detail=f"Failed to index {file.filename}.")
|
| 141 |
+
finally:
|
| 142 |
+
|
| 143 |
+
if os.path.exists(temp_file_path):
|
| 144 |
+
os.remove(temp_file_path)
|
| 145 |
+
|
| 146 |
+
@app.get("/list-docs", response_model=list[DocumentInfo])
|
| 147 |
+
def list_documents():
|
| 148 |
+
return get_all_documents()
|
| 149 |
+
|
| 150 |
+
@app.post("/delete-doc")
|
| 151 |
+
def delete_document(request: DeleteFileRequest):
|
| 152 |
+
pinecone_delete_success = delete_doc_from_pinecone(request.file_id)
|
| 153 |
+
|
| 154 |
+
if pinecone_delete_success:
|
| 155 |
+
db_delete_success = delete_document_record(request.file_id)
|
| 156 |
+
if db_delete_success:
|
| 157 |
+
return {"message": f"Successfully deleted document with file_id {request.file_id} from the system."}
|
| 158 |
+
else:
|
| 159 |
+
return {"error": f"Deleted from pinecone but failed to delete document with file_id {request.file_id} from the database."}
|
| 160 |
+
else:
|
| 161 |
+
return {"error": f"Failed to delete document with file_id {request.file_id} from pinecone."}
|
pinecone_utilis.py β backend/pinecone_utilis.py
RENAMED
|
File without changes
|
pydantic_models.py β backend/pydantic_models.py
RENAMED
|
File without changes
|
requirements.txt β backend/requirements.txt
RENAMED
|
File without changes
|
backend/utilis.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
from langchain.document_loaders import PyPDFLoader
|
| 2 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
-
from pinecone import Pinecone, ServerlessSpec
|
| 4 |
-
from langchain_core.prompts import PromptTemplate
|
| 5 |
-
from langchain_core.output_parsers import StrOutputParser
|
| 6 |
-
from operator import itemgetter
|
| 7 |
-
|
| 8 |
-
class RAG:
|
| 9 |
-
def load_split_file(self, file_path):
|
| 10 |
-
loader = PyPDFLoader(file_path)
|
| 11 |
-
pages = loader.load_and_split()
|
| 12 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
| 13 |
-
docs = text_splitter.split_documents(pages)
|
| 14 |
-
return docs
|
| 15 |
-
|
| 16 |
-
def create_index(self, index_name, PINECONE_API_KEY):
|
| 17 |
-
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 18 |
-
if index_name in pc.list_indexes().names():
|
| 19 |
-
pc.delete_index(index_name) # To avoid any conflicts in retrieval
|
| 20 |
-
pc.create_index(
|
| 21 |
-
name=index_name,
|
| 22 |
-
dimension=384,
|
| 23 |
-
metric='cosine',
|
| 24 |
-
spec=ServerlessSpec(
|
| 25 |
-
cloud="aws",
|
| 26 |
-
region="us-east-1"
|
| 27 |
-
)
|
| 28 |
-
)
|
| 29 |
-
return index_name
|
| 30 |
-
|
| 31 |
-
def final_response(self, index, question, model):
|
| 32 |
-
retriever = index.as_retriever()
|
| 33 |
-
parser = StrOutputParser()
|
| 34 |
-
template = """
|
| 35 |
-
You must provide an answer based strictly on the context below.
|
| 36 |
-
The answer is highly likely to be found within the given context, so analyze it thoroughly before responding.
|
| 37 |
-
Only if there is absolutely no relevant information, respond with "I don't know".
|
| 38 |
-
Do not make things up.
|
| 39 |
-
|
| 40 |
-
Context: {context}
|
| 41 |
-
|
| 42 |
-
Question: {question}
|
| 43 |
-
"""
|
| 44 |
-
prompt = PromptTemplate.from_template(template)
|
| 45 |
-
prompt.format(context="Here is some context", question="Here is a question")
|
| 46 |
-
|
| 47 |
-
chain = (
|
| 48 |
-
{
|
| 49 |
-
"context": itemgetter("question") | retriever,
|
| 50 |
-
"question": itemgetter("question"),
|
| 51 |
-
}
|
| 52 |
-
| prompt
|
| 53 |
-
| model
|
| 54 |
-
| parser
|
| 55 |
-
)
|
| 56 |
-
matching_results = index.similarity_search(question, k=2)
|
| 57 |
-
return f"Answer: {chain.invoke({'question': question})}", matching_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docker-compose.yml
ADDED
|
File without changes
|
frontend/app.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
# Backend URL configuration
|
| 7 |
+
BACKEND_URL = "http://localhost:8000"
|
| 8 |
+
|
| 9 |
+
# Initialize session state
|
| 10 |
+
if "session_id" not in st.session_state:
|
| 11 |
+
st.session_state.session_id = str(uuid.uuid4())
|
| 12 |
+
if "current_file" not in st.session_state:
|
| 13 |
+
st.session_state.current_file = None
|
| 14 |
+
if "challenge_questions" not in st.session_state:
|
| 15 |
+
st.session_state.challenge_questions = []
|
| 16 |
+
if "user_answers" not in st.session_state:
|
| 17 |
+
st.session_state.user_answers = {}
|
| 18 |
+
if "feedback" not in st.session_state:
|
| 19 |
+
st.session_state.feedback = {}
|
| 20 |
+
|
| 21 |
+
# Page setup
|
| 22 |
+
st.set_page_config(page_title="Research Assistant", layout="wide")
|
| 23 |
+
st.title("π Smart Research Assistant")
|
| 24 |
+
|
| 25 |
+
# Document management sidebar
|
| 26 |
+
with st.sidebar:
|
| 27 |
+
st.header("Document Management")
|
| 28 |
+
|
| 29 |
+
# Document upload
|
| 30 |
+
uploaded_file = st.file_uploader("Upload Document (PDF/TXT)", type=["pdf", "txt"])
|
| 31 |
+
if uploaded_file:
|
| 32 |
+
if st.button("Upload Document"):
|
| 33 |
+
response = requests.post(
|
| 34 |
+
f"{BACKEND_URL}/upload-doc",
|
| 35 |
+
files={"file": (uploaded_file.name, uploaded_file, "application/octet-stream")}
|
| 36 |
+
)
|
| 37 |
+
if response.status_code == 200:
|
| 38 |
+
data = response.json()
|
| 39 |
+
st.session_state.current_file = data["file_id"]
|
| 40 |
+
st.success(f"Document uploaded successfully! ID: {data['file_id']}")
|
| 41 |
+
with st.expander("Document Summary"):
|
| 42 |
+
st.write(data["summary"])
|
| 43 |
+
else:
|
| 44 |
+
st.error("Failed to upload document")
|
| 45 |
+
|
| 46 |
+
# List documents
|
| 47 |
+
st.subheader("Uploaded Documents")
|
| 48 |
+
try:
|
| 49 |
+
documents = requests.get(f"{BACKEND_URL}/list-docs").json()
|
| 50 |
+
for doc in documents:
|
| 51 |
+
doc_id = doc["id"]
|
| 52 |
+
with st.container(border=True):
|
| 53 |
+
st.write(f"**{doc['filename']}**")
|
| 54 |
+
st.caption(f"Uploaded: {datetime.fromisoformat(doc['upload_timestamp']).strftime('%Y-%m-%d %H:%M')}")
|
| 55 |
+
st.caption(f"ID: {doc_id}")
|
| 56 |
+
|
| 57 |
+
# Document selection
|
| 58 |
+
if st.button(f"Select", key=f"select_{doc_id}"):
|
| 59 |
+
st.session_state.current_file = doc_id
|
| 60 |
+
|
| 61 |
+
# Document deletion
|
| 62 |
+
if st.button(f"Delete", key=f"del_{doc_id}"):
|
| 63 |
+
del_response = requests.post(
|
| 64 |
+
f"{BACKEND_URL}/delete-doc",
|
| 65 |
+
json={"file_id": doc_id}
|
| 66 |
+
)
|
| 67 |
+
if del_response.status_code == 200:
|
| 68 |
+
st.rerun()
|
| 69 |
+
else:
|
| 70 |
+
st.error("Deletion failed")
|
| 71 |
+
except:
|
| 72 |
+
st.warning("No documents available")
|
| 73 |
+
|
| 74 |
+
# Main interaction tabs
|
| 75 |
+
ask_tab, challenge_tab = st.tabs(["Ask Anything", "Challenge Me"])
|
| 76 |
+
|
| 77 |
+
with ask_tab:
|
| 78 |
+
st.subheader("Document Q&A")
|
| 79 |
+
|
| 80 |
+
if st.session_state.current_file:
|
| 81 |
+
# Chat interface
|
| 82 |
+
user_question = st.text_input("Ask a question about the document:")
|
| 83 |
+
|
| 84 |
+
if user_question:
|
| 85 |
+
response = requests.post(
|
| 86 |
+
f"{BACKEND_URL}/chat",
|
| 87 |
+
json={
|
| 88 |
+
"question": user_question,
|
| 89 |
+
"session_id": st.session_state.session_id,
|
| 90 |
+
"model": "gpt-4o-mini"
|
| 91 |
+
}
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
if response.status_code == 200:
|
| 95 |
+
data = response.json()
|
| 96 |
+
st.divider()
|
| 97 |
+
st.subheader("Answer")
|
| 98 |
+
st.write(data["answer"])
|
| 99 |
+
st.caption(f"Session ID: {data['session_id']}")
|
| 100 |
+
else:
|
| 101 |
+
st.error("Failed to get response")
|
| 102 |
+
else:
|
| 103 |
+
st.warning("Please select a document first")
|
| 104 |
+
|
| 105 |
+
with challenge_tab:
|
| 106 |
+
st.subheader("Document Comprehension Challenge")
|
| 107 |
+
|
| 108 |
+
if st.session_state.current_file:
|
| 109 |
+
# Generate questions
|
| 110 |
+
if st.button("Generate Challenge Questions"):
|
| 111 |
+
response = requests.post(
|
| 112 |
+
f"{BACKEND_URL}/challenge-me",
|
| 113 |
+
json={"file_id": st.session_state.current_file}
|
| 114 |
+
)
|
| 115 |
+
if response.status_code == 200:
|
| 116 |
+
st.session_state.challenge_questions = response.json()
|
| 117 |
+
else:
|
| 118 |
+
st.error("Failed to generate questions")
|
| 119 |
+
|
| 120 |
+
# Display questions and answer inputs
|
| 121 |
+
if st.session_state.challenge_questions:
|
| 122 |
+
for i, question in enumerate(st.session_state.challenge_questions):
|
| 123 |
+
st.subheader(f"Question {i+1}")
|
| 124 |
+
st.write(question)
|
| 125 |
+
|
| 126 |
+
user_answer = st.text_input(
|
| 127 |
+
f"Your answer for question {i+1}:",
|
| 128 |
+
key=f"answer_{i}"
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Store answers
|
| 132 |
+
st.session_state.user_answers[i] = user_answer
|
| 133 |
+
|
| 134 |
+
# Evaluate answer
|
| 135 |
+
if st.button(f"Evaluate Answer {i+1}", key=f"eval_{i}"):
|
| 136 |
+
response = requests.post(
|
| 137 |
+
f"{BACKEND_URL}/evaluate-response",
|
| 138 |
+
json={
|
| 139 |
+
"file_id": st.session_state.current_file,
|
| 140 |
+
"question": question,
|
| 141 |
+
"user_answer": user_answer
|
| 142 |
+
}
|
| 143 |
+
)
|
| 144 |
+
if response.status_code == 200:
|
| 145 |
+
feedback = response.json()
|
| 146 |
+
st.session_state.feedback[i] = feedback
|
| 147 |
+
st.success("Answer evaluated!")
|
| 148 |
+
else:
|
| 149 |
+
st.error("Evaluation failed")
|
| 150 |
+
|
| 151 |
+
# Show feedback
|
| 152 |
+
if i in st.session_state.feedback:
|
| 153 |
+
with st.expander(f"Feedback for Question {i+1}"):
|
| 154 |
+
st.write(st.session_state.feedback[i]["feedback"])
|
| 155 |
+
else:
|
| 156 |
+
st.warning("Please select a document first")
|
| 157 |
+
|
| 158 |
+
# Session info
|
| 159 |
+
st.sidebar.divider()
|
| 160 |
+
st.sidebar.caption(f"Session ID: `{st.session_state.session_id}`")
|
main.py
DELETED
|
@@ -1,160 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
-
from pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest, ChallengeRequest, EvaluateAnswer
|
| 3 |
-
from langchain_utils import generate_response, retrieve
|
| 4 |
-
from db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record, get_file_content
|
| 5 |
-
from pinecone_utilis import index_document_to_pinecone, delete_doc_from_pinecone, load_and_split_document
|
| 6 |
-
from langchain_openai import ChatOpenAI
|
| 7 |
-
from langchain_core.prompts import ChatPromptTemplate
|
| 8 |
-
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
-
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
|
| 10 |
-
import os
|
| 11 |
-
import uuid
|
| 12 |
-
import logging
|
| 13 |
-
import shutil
|
| 14 |
-
|
| 15 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Set up logging
|
| 19 |
-
logging.basicConfig(filename='app.log', level=logging.INFO)
|
| 20 |
-
|
| 21 |
-
# Initialize FastAPI app
|
| 22 |
-
app = FastAPI()
|
| 23 |
-
|
| 24 |
-
@app.post("/chat", response_model=QueryResponse)
|
| 25 |
-
def chat(query_input: QueryInput):
|
| 26 |
-
session_id = query_input.session_id or str(uuid.uuid4())
|
| 27 |
-
logging.info(f"Session ID: {session_id}, User Query: {query_input.question}, Model: {query_input.model.value}")
|
| 28 |
-
chat_history = get_chat_history(session_id)
|
| 29 |
-
state={"messages":[]} # test
|
| 30 |
-
messages_state = generate_response(query=query_input.question, state=state)
|
| 31 |
-
answer=messages_state["messages"][-1].content
|
| 32 |
-
|
| 33 |
-
insert_application_logs(session_id, query_input.question, answer, query_input.model.value)
|
| 34 |
-
logging.info(f"Session ID: {session_id}, AI Response: {answer}")
|
| 35 |
-
return QueryResponse(answer=answer, session_id=session_id, model=query_input.model)
|
| 36 |
-
|
| 37 |
-
@app.post('/challenge-me', response_model=list[str])
|
| 38 |
-
def challenge_me(request: ChallengeRequest):
|
| 39 |
-
file_id = request.file_id
|
| 40 |
-
|
| 41 |
-
content = get_file_content(file_id)
|
| 42 |
-
if content is None:
|
| 43 |
-
raise HTTPException(status_code=404, detail="Document not found")
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
llm = ChatOpenAI(
|
| 47 |
-
model='gpt-4.1',
|
| 48 |
-
api_key=OPENAI_API_KEY
|
| 49 |
-
)
|
| 50 |
-
prompt = ChatPromptTemplate.from_messages([
|
| 51 |
-
("system", "You are a helpful AI assistant. Generate three logic-based or comprehension-focused questions about the following document. Each question should require understanding or reasoning about the document content, not just simple recall. Provide each question on a new line."),
|
| 52 |
-
("human", "Document: {context}\n\nQuestions:")
|
| 53 |
-
])
|
| 54 |
-
chain = prompt | llm | StrOutputParser()
|
| 55 |
-
questions_str = chain.invoke({"context": content})
|
| 56 |
-
questions = [q.strip() for q in questions_str.split('\n') if q.strip()][:3]
|
| 57 |
-
|
| 58 |
-
return questions
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
@app.post('/evaluate-response')
|
| 63 |
-
def evaluate_response(request: EvaluateAnswer):
|
| 64 |
-
# get the file ralated to answers
|
| 65 |
-
file_id = request.file_id
|
| 66 |
-
question = request.question
|
| 67 |
-
user_answer=request.user_answer
|
| 68 |
-
|
| 69 |
-
# evaluate the useranswer according to the research paper
|
| 70 |
-
|
| 71 |
-
llm = ChatOpenAI(
|
| 72 |
-
model='gpt-4.1',
|
| 73 |
-
api_key=OPENAI_API_KEY
|
| 74 |
-
)
|
| 75 |
-
# get the context from doc
|
| 76 |
-
retrieved_docs=retrieve(query=question)
|
| 77 |
-
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
prompt = ChatPromptTemplate.from_messages([
|
| 81 |
-
("system", "You are a helpful AI assistant. Your task is to evaluate the user's answer to a question, using ONLY the information below as reference. If the answer is not correct, explain why and provide the correct answer with justification from the document. Do not make up information."),
|
| 82 |
-
("system", "Context: {context}"),
|
| 83 |
-
("human", "Question: {question}\nUser Answer: {user_answer}\nEvaluation:")
|
| 84 |
-
])
|
| 85 |
-
|
| 86 |
-
chain = prompt | llm | StrOutputParser()
|
| 87 |
-
evaluation = chain.invoke({
|
| 88 |
-
"context": docs_content,
|
| 89 |
-
"question": question,
|
| 90 |
-
"user_answer": user_answer
|
| 91 |
-
})
|
| 92 |
-
|
| 93 |
-
return {
|
| 94 |
-
"feedback": evaluation,
|
| 95 |
-
"file_id": file_id
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
@app.post("/upload-doc")
|
| 101 |
-
def upload_and_index_document(file: UploadFile = File(...)):
|
| 102 |
-
allowed_extensions = ['.pdf', '.txt']
|
| 103 |
-
file_extension = os.path.splitext(file.filename)[1].lower()
|
| 104 |
-
|
| 105 |
-
if file_extension not in allowed_extensions:
|
| 106 |
-
raise HTTPException(status_code=400, detail=f"Unsupported file type. Allowed types are: {', '.join(allowed_extensions)}")
|
| 107 |
-
|
| 108 |
-
temp_file_path = f"temp_{file.filename}"
|
| 109 |
-
|
| 110 |
-
try:
|
| 111 |
-
# Save the uploaded file to a temporary file
|
| 112 |
-
with open(temp_file_path, "wb") as buffer:
|
| 113 |
-
shutil.copyfileobj(file.file, buffer)
|
| 114 |
-
docs = load_and_split_document(temp_file_path)
|
| 115 |
-
docs_content = "\n\n".join(doc.page_content for doc in docs)
|
| 116 |
-
file_id = insert_document_record(file.filename, docs_content)
|
| 117 |
-
success = index_document_to_pinecone(temp_file_path, file_id)
|
| 118 |
-
|
| 119 |
-
if success:
|
| 120 |
-
# generate summary
|
| 121 |
-
|
| 122 |
-
llm = ChatOpenAI(
|
| 123 |
-
model='gpt-4.1',
|
| 124 |
-
api_key=OPENAI_API_KEY
|
| 125 |
-
)
|
| 126 |
-
prompt = ChatPromptTemplate.from_messages([
|
| 127 |
-
("system", "You are a helpful assistant. Summarize the following document in no more than 150 words. Focus on the main points and key findings. Do not include information not present in the document."),
|
| 128 |
-
("human", "{document}")
|
| 129 |
-
])
|
| 130 |
-
chain = prompt | llm | StrOutputParser()
|
| 131 |
-
summary = chain.invoke({"document": docs_content})
|
| 132 |
-
return {
|
| 133 |
-
"message": f"File {file.filename} has been successfully uploaded and indexed.",
|
| 134 |
-
"file_id": file_id,
|
| 135 |
-
"summary": summary
|
| 136 |
-
}
|
| 137 |
-
else:
|
| 138 |
-
delete_document_record(file_id)
|
| 139 |
-
raise HTTPException(status_code=500, detail=f"Failed to index {file.filename}.")
|
| 140 |
-
finally:
|
| 141 |
-
|
| 142 |
-
if os.path.exists(temp_file_path):
|
| 143 |
-
os.remove(temp_file_path)
|
| 144 |
-
|
| 145 |
-
@app.get("/list-docs", response_model=list[DocumentInfo])
|
| 146 |
-
def list_documents():
|
| 147 |
-
return get_all_documents()
|
| 148 |
-
|
| 149 |
-
@app.post("/delete-doc")
|
| 150 |
-
def delete_document(request: DeleteFileRequest):
|
| 151 |
-
pinecone_delete_success = delete_doc_from_pinecone(request.file_id)
|
| 152 |
-
|
| 153 |
-
if pinecone_delete_success:
|
| 154 |
-
db_delete_success = delete_document_record(request.file_id)
|
| 155 |
-
if db_delete_success:
|
| 156 |
-
return {"message": f"Successfully deleted document with file_id {request.file_id} from the system."}
|
| 157 |
-
else:
|
| 158 |
-
return {"error": f"Deleted from pinecone but failed to delete document with file_id {request.file_id} from the database."}
|
| 159 |
-
else:
|
| 160 |
-
return {"error": f"Failed to delete document with file_id {request.file_id} from pinecone."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui.py β test.py
RENAMED
|
@@ -7,8 +7,8 @@ BASE_URL = "http://localhost:8000"
|
|
| 7 |
|
| 8 |
# with open("neural computing cwsi.pdf", "rb") as f:
|
| 9 |
# files = {"file": ("neural computing cwsi.pdf", f, "text/plain")}
|
| 10 |
-
# upload_response = requests.post(f"{BASE_URL}/
|
| 11 |
-
# print("Upload Response:", upload_response.json())
|
| 12 |
|
| 13 |
# file_id = upload_response.json().get("summary")
|
| 14 |
|
|
|
|
| 7 |
|
| 8 |
# with open("neural computing cwsi.pdf", "rb") as f:
|
| 9 |
# files = {"file": ("neural computing cwsi.pdf", f, "text/plain")}
|
| 10 |
+
# upload_response = requests.post(f"{BASE_URL}/upload-doc", files=files)
|
| 11 |
+
# # print("Upload Response:", upload_response.json())
|
| 12 |
|
| 13 |
# file_id = upload_response.json().get("summary")
|
| 14 |
|