Spaces:
Sleeping
Sleeping
backend
Browse files- .gitignore +5 -1
- __pycache__/db_utils.cpython-311.pyc +0 -0
- __pycache__/langchain_utils.cpython-311.pyc +0 -0
- __pycache__/main.cpython-311.pyc +0 -0
- __pycache__/pinecone_utilis.cpython-311.pyc +0 -0
- __pycache__/pydantic_models.cpython-311.pyc +0 -0
- db_utils.py +23 -7
- langchain_utils.py +31 -4
- main.py +103 -19
- pinecone_utilis.py +13 -8
- prompt_templates.py +0 -64
- pydantic_models.py +8 -0
- ui.py +22 -58
.gitignore
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
venv
|
| 2 |
InternTaskGenAI.pdf
|
| 3 |
-
.env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
venv
|
| 2 |
InternTaskGenAI.pdf
|
| 3 |
+
.env
|
| 4 |
+
research_assistant.db
|
| 5 |
+
neural computing cwsi.pdf
|
| 6 |
+
app.log
|
| 7 |
+
__pycache__
|
__pycache__/db_utils.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/db_utils.cpython-311.pyc and b/__pycache__/db_utils.cpython-311.pyc differ
|
|
|
__pycache__/langchain_utils.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/langchain_utils.cpython-311.pyc and b/__pycache__/langchain_utils.cpython-311.pyc differ
|
|
|
__pycache__/main.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/main.cpython-311.pyc and b/__pycache__/main.cpython-311.pyc differ
|
|
|
__pycache__/pinecone_utilis.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/pinecone_utilis.cpython-311.pyc and b/__pycache__/pinecone_utilis.cpython-311.pyc differ
|
|
|
__pycache__/pydantic_models.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/pydantic_models.cpython-311.pyc and b/__pycache__/pydantic_models.cpython-311.pyc differ
|
|
|
db_utils.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
import sqlite3
|
| 2 |
from datetime import datetime
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
def get_db_connection():
|
| 7 |
conn = sqlite3.connect(DB_NAME)
|
|
@@ -24,6 +26,7 @@ def create_document_store():
|
|
| 24 |
conn.execute('''CREATE TABLE IF NOT EXISTS document_store
|
| 25 |
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 26 |
filename TEXT,
|
|
|
|
| 27 |
upload_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
|
| 28 |
conn.close()
|
| 29 |
|
|
@@ -40,16 +43,15 @@ def get_chat_history(session_id):
|
|
| 40 |
cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
|
| 41 |
messages = []
|
| 42 |
for row in cursor.fetchall():
|
| 43 |
-
messages.
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
])
|
| 47 |
conn.close()
|
| 48 |
return messages
|
| 49 |
-
def insert_document_record(filename):
|
| 50 |
conn = get_db_connection()
|
| 51 |
cursor = conn.cursor()
|
| 52 |
-
cursor.execute('INSERT INTO document_store (filename) VALUES (?)', (filename,))
|
| 53 |
file_id = cursor.lastrowid
|
| 54 |
conn.commit()
|
| 55 |
conn.close()
|
|
@@ -70,6 +72,20 @@ def get_all_documents():
|
|
| 70 |
conn.close()
|
| 71 |
return [dict(doc) for doc in documents]
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# Initialize the database tables
|
| 74 |
create_application_logs()
|
| 75 |
create_document_store()
|
|
|
|
| 1 |
import sqlite3
|
| 2 |
from datetime import datetime
|
| 3 |
+
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
| 4 |
|
| 5 |
+
|
| 6 |
+
DB_NAME = "research_assistant.db"
|
| 7 |
|
| 8 |
def get_db_connection():
|
| 9 |
conn = sqlite3.connect(DB_NAME)
|
|
|
|
| 26 |
conn.execute('''CREATE TABLE IF NOT EXISTS document_store
|
| 27 |
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 28 |
filename TEXT,
|
| 29 |
+
content TEXT,
|
| 30 |
upload_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
|
| 31 |
conn.close()
|
| 32 |
|
|
|
|
| 43 |
cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
|
| 44 |
messages = []
|
| 45 |
for row in cursor.fetchall():
|
| 46 |
+
messages.append(HumanMessage(content=row['user_query']))
|
| 47 |
+
messages.append(AIMessage(content=row['gpt_response']))
|
| 48 |
+
|
|
|
|
| 49 |
conn.close()
|
| 50 |
return messages
|
| 51 |
+
def insert_document_record(filename, content):
|
| 52 |
conn = get_db_connection()
|
| 53 |
cursor = conn.cursor()
|
| 54 |
+
cursor.execute('INSERT INTO document_store (filename, content) VALUES (?, ?)', (filename, content))
|
| 55 |
file_id = cursor.lastrowid
|
| 56 |
conn.commit()
|
| 57 |
conn.close()
|
|
|
|
| 72 |
conn.close()
|
| 73 |
return [dict(doc) for doc in documents]
|
| 74 |
|
| 75 |
+
|
| 76 |
+
def get_file_content(file_id: int) -> str | None:
|
| 77 |
+
conn = get_db_connection()
|
| 78 |
+
try:
|
| 79 |
+
cursor = conn.cursor()
|
| 80 |
+
cursor.execute('SELECT content FROM document_store WHERE id = ?', (file_id,))
|
| 81 |
+
row = cursor.fetchone()
|
| 82 |
+
if row is not None:
|
| 83 |
+
return row[0]
|
| 84 |
+
return None
|
| 85 |
+
finally:
|
| 86 |
+
conn.close()
|
| 87 |
+
|
| 88 |
+
|
| 89 |
# Initialize the database tables
|
| 90 |
create_application_logs()
|
| 91 |
create_document_store()
|
langchain_utils.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from langchain_openai import ChatOpenAI
|
| 2 |
from langchain_core.output_parsers import StrOutputParser
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
from typing import List
|
| 7 |
from typing_extensions import List, TypedDict
|
| 8 |
from langchain_core.documents import Document
|
|
@@ -11,8 +11,11 @@ from pinecone_utilis import vectorstore
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
| 13 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
| 14 |
-
retriever = vectorstore.as_retriever(search_kwargs={"k":
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
output_parser = StrOutputParser()
|
| 17 |
|
| 18 |
contextualize_q_system_prompt = (
|
|
@@ -38,6 +41,30 @@ qa_prompt = ChatPromptTemplate.from_messages([
|
|
| 38 |
|
| 39 |
class State(TypedDict):
|
| 40 |
messages: List[BaseMessage]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
|
|
|
|
| 1 |
from langchain_openai import ChatOpenAI
|
| 2 |
from langchain_core.output_parsers import StrOutputParser
|
| 3 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 4 |
+
from langgraph.graph import START, StateGraph
|
| 5 |
+
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage, SystemMessage
|
| 6 |
from typing import List
|
| 7 |
from typing_extensions import List, TypedDict
|
| 8 |
from langchain_core.documents import Document
|
|
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
| 13 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
| 14 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
|
| 15 |
+
llm = ChatOpenAI(
|
| 16 |
+
model='gpt-4.1',
|
| 17 |
+
api_key=OPENAI_API_KEY
|
| 18 |
+
)
|
| 19 |
output_parser = StrOutputParser()
|
| 20 |
|
| 21 |
contextualize_q_system_prompt = (
|
|
|
|
| 41 |
|
| 42 |
class State(TypedDict):
|
| 43 |
messages: List[BaseMessage]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Define application steps
|
| 48 |
+
def retrieve(query: str):
|
| 49 |
+
retrieved_docs = vectorstore.similarity_search(query)
|
| 50 |
+
return retrieved_docs
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def generate_response(query: str, state: State)->State:
|
| 54 |
+
retrieved_docs=retrieve(query=query)
|
| 55 |
+
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
| 56 |
+
system_message = SystemMessage(
|
| 57 |
+
content="You are a helpful AI assistant. Answer the user's question using ONLY the information provided below. "
|
| 58 |
+
"If the answer is not in the context, say 'I don't know.' Do not make up information. "
|
| 59 |
+
f"Context: {docs_content}"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
state['messages'].append(system_message)
|
| 63 |
+
state['messages'].append(HumanMessage(content=query))
|
| 64 |
+
|
| 65 |
+
response = llm.invoke(state["messages"])
|
| 66 |
+
state['messages'].append(AIMessage(content=response.content))
|
| 67 |
+
return state
|
| 68 |
|
| 69 |
|
| 70 |
|
main.py
CHANGED
|
@@ -1,13 +1,20 @@
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
-
from pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest
|
| 3 |
-
from langchain_utils import
|
| 4 |
-
from db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record
|
| 5 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import os
|
| 7 |
import uuid
|
| 8 |
import logging
|
| 9 |
import shutil
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
# Set up logging
|
| 12 |
logging.basicConfig(filename='app.log', level=logging.INFO)
|
| 13 |
|
|
@@ -18,22 +25,81 @@ app = FastAPI()
|
|
| 18 |
def chat(query_input: QueryInput):
|
| 19 |
session_id = query_input.session_id or str(uuid.uuid4())
|
| 20 |
logging.info(f"Session ID: {session_id}, User Query: {query_input.question}, Model: {query_input.model.value}")
|
| 21 |
-
|
| 22 |
chat_history = get_chat_history(session_id)
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
"chat_history": chat_history
|
| 27 |
-
})['answer']
|
| 28 |
|
| 29 |
insert_application_logs(session_id, query_input.question, answer, query_input.model.value)
|
| 30 |
logging.info(f"Session ID: {session_id}, AI Response: {answer}")
|
| 31 |
return QueryResponse(answer=answer, session_id=session_id, model=query_input.model)
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
@app.post("/upload-doc")
|
| 35 |
def upload_and_index_document(file: UploadFile = File(...)):
|
| 36 |
-
allowed_extensions = ['.pdf', '.
|
| 37 |
file_extension = os.path.splitext(file.filename)[1].lower()
|
| 38 |
|
| 39 |
if file_extension not in allowed_extensions:
|
|
@@ -45,16 +111,34 @@ def upload_and_index_document(file: UploadFile = File(...)):
|
|
| 45 |
# Save the uploaded file to a temporary file
|
| 46 |
with open(temp_file_path, "wb") as buffer:
|
| 47 |
shutil.copyfileobj(file.file, buffer)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
| 51 |
|
| 52 |
if success:
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
else:
|
| 55 |
delete_document_record(file_id)
|
| 56 |
raise HTTPException(status_code=500, detail=f"Failed to index {file.filename}.")
|
| 57 |
finally:
|
|
|
|
| 58 |
if os.path.exists(temp_file_path):
|
| 59 |
os.remove(temp_file_path)
|
| 60 |
|
|
@@ -64,13 +148,13 @@ def list_documents():
|
|
| 64 |
|
| 65 |
@app.post("/delete-doc")
|
| 66 |
def delete_document(request: DeleteFileRequest):
|
| 67 |
-
|
| 68 |
|
| 69 |
-
if
|
| 70 |
db_delete_success = delete_document_record(request.file_id)
|
| 71 |
if db_delete_success:
|
| 72 |
return {"message": f"Successfully deleted document with file_id {request.file_id} from the system."}
|
| 73 |
else:
|
| 74 |
-
return {"error": f"Deleted from
|
| 75 |
else:
|
| 76 |
-
return {"error": f"Failed to delete document with file_id {request.file_id} from
|
|
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
+
from pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest, ChallengeRequest, EvaluateAnswer
|
| 3 |
+
from langchain_utils import generate_response, retrieve
|
| 4 |
+
from db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record, get_file_content
|
| 5 |
+
from pinecone_utilis import index_document_to_pinecone, delete_doc_from_pinecone, load_and_split_document
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 8 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 9 |
+
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
|
| 10 |
import os
|
| 11 |
import uuid
|
| 12 |
import logging
|
| 13 |
import shutil
|
| 14 |
|
| 15 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
# Set up logging
|
| 19 |
logging.basicConfig(filename='app.log', level=logging.INFO)
|
| 20 |
|
|
|
|
| 25 |
def chat(query_input: QueryInput):
|
| 26 |
session_id = query_input.session_id or str(uuid.uuid4())
|
| 27 |
logging.info(f"Session ID: {session_id}, User Query: {query_input.question}, Model: {query_input.model.value}")
|
|
|
|
| 28 |
chat_history = get_chat_history(session_id)
|
| 29 |
+
state={"messages":[]} # test
|
| 30 |
+
messages_state = generate_response(query=query_input.question, state=state)
|
| 31 |
+
answer=messages_state["messages"][-1].content
|
|
|
|
|
|
|
| 32 |
|
| 33 |
insert_application_logs(session_id, query_input.question, answer, query_input.model.value)
|
| 34 |
logging.info(f"Session ID: {session_id}, AI Response: {answer}")
|
| 35 |
return QueryResponse(answer=answer, session_id=session_id, model=query_input.model)
|
| 36 |
|
| 37 |
+
@app.post('/challenge-me', response_model=list[str])
|
| 38 |
+
def challenge_me(request: ChallengeRequest):
|
| 39 |
+
file_id = request.file_id
|
| 40 |
+
|
| 41 |
+
content = get_file_content(file_id)
|
| 42 |
+
if content is None:
|
| 43 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
llm = ChatOpenAI(
|
| 47 |
+
model='gpt-4.1',
|
| 48 |
+
api_key=OPENAI_API_KEY
|
| 49 |
+
)
|
| 50 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 51 |
+
("system", "You are a helpful AI assistant. Generate three logic-based or comprehension-focused questions about the following document. Each question should require understanding or reasoning about the document content, not just simple recall. Provide each question on a new line."),
|
| 52 |
+
("human", "Document: {context}\n\nQuestions:")
|
| 53 |
+
])
|
| 54 |
+
chain = prompt | llm | StrOutputParser()
|
| 55 |
+
questions_str = chain.invoke({"context": content})
|
| 56 |
+
questions = [q.strip() for q in questions_str.split('\n') if q.strip()][:3]
|
| 57 |
+
|
| 58 |
+
return questions
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@app.post('/evaluate-response')
|
| 63 |
+
def evaluate_response(request: EvaluateAnswer):
|
| 64 |
+
# get the file ralated to answers
|
| 65 |
+
file_id = request.file_id
|
| 66 |
+
question = request.question
|
| 67 |
+
user_answer=request.user_answer
|
| 68 |
+
|
| 69 |
+
# evaluate the useranswer according to the research paper
|
| 70 |
+
|
| 71 |
+
llm = ChatOpenAI(
|
| 72 |
+
model='gpt-4.1',
|
| 73 |
+
api_key=OPENAI_API_KEY
|
| 74 |
+
)
|
| 75 |
+
# get the context from doc
|
| 76 |
+
retrieved_docs=retrieve(query=question)
|
| 77 |
+
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 81 |
+
("system", "You are a helpful AI assistant. Your task is to evaluate the user's answer to a question, using ONLY the information below as reference. If the answer is not correct, explain why and provide the correct answer with justification from the document. Do not make up information."),
|
| 82 |
+
("system", "Context: {context}"),
|
| 83 |
+
("human", "Question: {question}\nUser Answer: {user_answer}\nEvaluation:")
|
| 84 |
+
])
|
| 85 |
+
|
| 86 |
+
chain = prompt | llm | StrOutputParser()
|
| 87 |
+
evaluation = chain.invoke({
|
| 88 |
+
"context": docs_content,
|
| 89 |
+
"question": question,
|
| 90 |
+
"user_answer": user_answer
|
| 91 |
+
})
|
| 92 |
+
|
| 93 |
+
return {
|
| 94 |
+
"feedback": evaluation,
|
| 95 |
+
"file_id": file_id
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
|
| 99 |
|
| 100 |
@app.post("/upload-doc")
|
| 101 |
def upload_and_index_document(file: UploadFile = File(...)):
|
| 102 |
+
allowed_extensions = ['.pdf', '.txt']
|
| 103 |
file_extension = os.path.splitext(file.filename)[1].lower()
|
| 104 |
|
| 105 |
if file_extension not in allowed_extensions:
|
|
|
|
| 111 |
# Save the uploaded file to a temporary file
|
| 112 |
with open(temp_file_path, "wb") as buffer:
|
| 113 |
shutil.copyfileobj(file.file, buffer)
|
| 114 |
+
docs = load_and_split_document(temp_file_path)
|
| 115 |
+
docs_content = "\n\n".join(doc.page_content for doc in docs)
|
| 116 |
+
file_id = insert_document_record(file.filename, docs_content)
|
| 117 |
+
success = index_document_to_pinecone(temp_file_path, file_id)
|
| 118 |
|
| 119 |
if success:
|
| 120 |
+
# generate summary
|
| 121 |
+
|
| 122 |
+
llm = ChatOpenAI(
|
| 123 |
+
model='gpt-4.1',
|
| 124 |
+
api_key=OPENAI_API_KEY
|
| 125 |
+
)
|
| 126 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 127 |
+
("system", "You are a helpful assistant. Summarize the following document in no more than 150 words. Focus on the main points and key findings. Do not include information not present in the document."),
|
| 128 |
+
("human", "{document}")
|
| 129 |
+
])
|
| 130 |
+
chain = prompt | llm | StrOutputParser()
|
| 131 |
+
summary = chain.invoke({"document": docs_content})
|
| 132 |
+
return {
|
| 133 |
+
"message": f"File {file.filename} has been successfully uploaded and indexed.",
|
| 134 |
+
"file_id": file_id,
|
| 135 |
+
"summary": summary
|
| 136 |
+
}
|
| 137 |
else:
|
| 138 |
delete_document_record(file_id)
|
| 139 |
raise HTTPException(status_code=500, detail=f"Failed to index {file.filename}.")
|
| 140 |
finally:
|
| 141 |
+
|
| 142 |
if os.path.exists(temp_file_path):
|
| 143 |
os.remove(temp_file_path)
|
| 144 |
|
|
|
|
| 148 |
|
| 149 |
@app.post("/delete-doc")
|
| 150 |
def delete_document(request: DeleteFileRequest):
|
| 151 |
+
pinecone_delete_success = delete_doc_from_pinecone(request.file_id)
|
| 152 |
|
| 153 |
+
if pinecone_delete_success:
|
| 154 |
db_delete_success = delete_document_record(request.file_id)
|
| 155 |
if db_delete_success:
|
| 156 |
return {"message": f"Successfully deleted document with file_id {request.file_id} from the system."}
|
| 157 |
else:
|
| 158 |
+
return {"error": f"Deleted from pinecone but failed to delete document with file_id {request.file_id} from the database."}
|
| 159 |
else:
|
| 160 |
+
return {"error": f"Failed to delete document with file_id {request.file_id} from pinecone."}
|
pinecone_utilis.py
CHANGED
|
@@ -11,7 +11,6 @@ load_dotenv()
|
|
| 11 |
|
| 12 |
# API keys
|
| 13 |
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
|
| 14 |
-
print(f"Pinecone API Key: {PINECONE_API_KEY}")
|
| 15 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
| 16 |
|
| 17 |
|
|
@@ -72,13 +71,19 @@ def index_document_to_pinecone(file_path: str, file_id: int) -> bool:
|
|
| 72 |
|
| 73 |
def delete_doc_from_pinecone(file_id: int):
|
| 74 |
try:
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
return True
|
| 82 |
except Exception as e:
|
| 83 |
-
print(f"Error deleting
|
| 84 |
return False
|
|
|
|
|
|
| 11 |
|
| 12 |
# API keys
|
| 13 |
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
|
|
|
|
| 14 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
|
| 15 |
|
| 16 |
|
|
|
|
| 71 |
|
| 72 |
def delete_doc_from_pinecone(file_id: int):
|
| 73 |
try:
|
| 74 |
+
index = pc.Index(INDEX_NAME)
|
| 75 |
+
# Query for all vectors with file_id metadata
|
| 76 |
+
query_result = index.query(
|
| 77 |
+
vector=[0.0]*1024,
|
| 78 |
+
filter={"file_id": {"$eq": str(file_id)}},
|
| 79 |
+
top_k=10000,
|
| 80 |
+
include_metadata=True
|
| 81 |
+
)
|
| 82 |
+
ids = [match["id"] for match in query_result["matches"]]
|
| 83 |
+
if ids:
|
| 84 |
+
index.delete(ids=ids)
|
| 85 |
return True
|
| 86 |
except Exception as e:
|
| 87 |
+
print(f"Error deleting from Pinecone: {str(e)}")
|
| 88 |
return False
|
| 89 |
+
|
prompt_templates.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
| 1 |
-
# 1. Auto-summarization prompt template
|
| 2 |
-
AUTO_SUMMARY_TEMPLATE = """
|
| 3 |
-
Summarize the following document in no more than 150 words. Focus on the main points and key findings. Do not include information not present in the document.
|
| 4 |
-
|
| 5 |
-
DOCUMENT:
|
| 6 |
-
{document}
|
| 7 |
-
|
| 8 |
-
SUMMARY:
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
# 2. Question answering prompt template
|
| 12 |
-
QA_PROMPT_TEMPLATE = """
|
| 13 |
-
Answer the following question based only on the provided document. Your answer must be grounded in the document and include a specific reference to the text that supports your answer.
|
| 14 |
-
|
| 15 |
-
Document:
|
| 16 |
-
{document}
|
| 17 |
-
|
| 18 |
-
Question:
|
| 19 |
-
{question}
|
| 20 |
-
|
| 21 |
-
Answer:
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
# 3. Logic-based question generation prompt template
|
| 25 |
-
LOGIC_QUESTION_GENERATION_TEMPLATE = """
|
| 26 |
-
Generate three logic-based or comprehension-focused questions about the following document. Each question should require understanding or reasoning about the document content, not just simple recall. Provide each question on a new line.
|
| 27 |
-
|
| 28 |
-
Document:
|
| 29 |
-
{document}
|
| 30 |
-
|
| 31 |
-
Questions:
|
| 32 |
-
"""
|
| 33 |
-
|
| 34 |
-
# 4. Answer evaluation prompt template
|
| 35 |
-
ANSWER_EVALUATION_TEMPLATE = """
|
| 36 |
-
Evaluate the following user answer to the question, using only the provided document as the source of truth. State whether the answer is correct or not, and provide a brief justification referencing the document.
|
| 37 |
-
|
| 38 |
-
Document:
|
| 39 |
-
{document}
|
| 40 |
-
|
| 41 |
-
Question:
|
| 42 |
-
{question}
|
| 43 |
-
|
| 44 |
-
User Answer:
|
| 45 |
-
{user_answer}
|
| 46 |
-
|
| 47 |
-
Evaluation:
|
| 48 |
-
"""
|
| 49 |
-
|
| 50 |
-
# 5. For memory/follow-up: Chat prompt template
|
| 51 |
-
CHAT_PROMPT_TEMPLATE = """
|
| 52 |
-
The following is a conversation between a user and an AI assistant about a document. The assistant answers questions and provides justifications based on the document. Use the conversation history and the document to answer the new question.
|
| 53 |
-
|
| 54 |
-
Document:
|
| 55 |
-
{document}
|
| 56 |
-
|
| 57 |
-
Conversation History:
|
| 58 |
-
{history}
|
| 59 |
-
|
| 60 |
-
Question:
|
| 61 |
-
{question}
|
| 62 |
-
|
| 63 |
-
Answer:
|
| 64 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pydantic_models.py
CHANGED
|
@@ -23,3 +23,11 @@ class DocumentInfo(BaseModel):
|
|
| 23 |
|
| 24 |
class DeleteFileRequest(BaseModel):
|
| 25 |
file_id: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
class DeleteFileRequest(BaseModel):
|
| 25 |
file_id: int
|
| 26 |
+
|
| 27 |
+
class ChallengeRequest(BaseModel):
|
| 28 |
+
file_id: int
|
| 29 |
+
|
| 30 |
+
class EvaluateAnswer(BaseModel):
|
| 31 |
+
file_id: int
|
| 32 |
+
question: str
|
| 33 |
+
user_answer: str
|
ui.py
CHANGED
|
@@ -1,61 +1,25 @@
|
|
| 1 |
-
|
| 2 |
import requests
|
| 3 |
|
| 4 |
# Set the FastAPI backend URL
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
response = requests.get(f"{FASTAPI_URL}/list-docs")
|
| 27 |
-
if response.status_code == 200:
|
| 28 |
-
documents = response.json()
|
| 29 |
-
st.write("Available Documents:")
|
| 30 |
-
for doc in documents:
|
| 31 |
-
st.write(f"- {doc['filename']} (ID: {doc['file_id']})")
|
| 32 |
-
else:
|
| 33 |
-
st.error("Failed to list documents")
|
| 34 |
-
|
| 35 |
-
# Interaction Modes
|
| 36 |
-
mode = st.radio("Choose Mode", ["Ask Anything", "Challenge Me"])
|
| 37 |
-
|
| 38 |
-
if mode == "Ask Anything":
|
| 39 |
-
question = st.text_input("Ask a question about the document")
|
| 40 |
-
if question and st.button("Submit"):
|
| 41 |
-
payload = {
|
| 42 |
-
"question": question,
|
| 43 |
-
"session_id": "user123", # Replace with actual session management
|
| 44 |
-
"model": "default" # Replace with your model selection
|
| 45 |
-
}
|
| 46 |
-
response = requests.post(f"{FASTAPI_URL}/chat", json=payload)
|
| 47 |
-
if response.status_code == 200:
|
| 48 |
-
result = response.json()
|
| 49 |
-
st.write("Answer:", result["answer"])
|
| 50 |
-
# If your backend returns a source snippet, display it:
|
| 51 |
-
# st.write("Source:", result.get("source", ""))
|
| 52 |
-
else:
|
| 53 |
-
st.error("Failed to get answer")
|
| 54 |
-
|
| 55 |
-
# elif mode == "Challenge Me":
|
| 56 |
-
# if st.button("Generate Challenge Questions"):
|
| 57 |
-
|
| 58 |
-
# # Assume your backend has a `/generate-questions` endpoint
|
| 59 |
-
# # response = requests.post(f"{FASTAPI_URL}/generate-questions", json={"file_id": file_id})
|
| 60 |
-
# # if response.status_code == 200:
|
| 61 |
-
# # questions
|
|
|
|
| 1 |
+
|
| 2 |
import requests
|
| 3 |
|
| 4 |
# Set the FastAPI backend URL
|
| 5 |
+
BASE_URL = "http://localhost:8000"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# with open("neural computing cwsi.pdf", "rb") as f:
|
| 9 |
+
# files = {"file": ("neural computing cwsi.pdf", f, "text/plain")}
|
| 10 |
+
# upload_response = requests.post(f"{BASE_URL}/chat", files=files)
|
| 11 |
+
# print("Upload Response:", upload_response.json())
|
| 12 |
+
|
| 13 |
+
# file_id = upload_response.json().get("summary")
|
| 14 |
+
|
| 15 |
+
# print(file_id)
|
| 16 |
+
|
| 17 |
+
chat_data = {"question": "What is the main topic?", "model": "gpt-4o-mini"}
|
| 18 |
+
chat_response = requests.post(f"{BASE_URL}/chat", json=chat_data)
|
| 19 |
+
print("Chat Response:", chat_response.json())
|
| 20 |
+
|
| 21 |
+
# delete_data={"file_id": 1}
|
| 22 |
+
|
| 23 |
+
# delete_response = requests.post(f"{BASE_URL}/delete-doc", json=delete_data)
|
| 24 |
+
|
| 25 |
+
# print("Delete Response:", delete_response.json())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|