umar-100 commited on
Commit
aea1c12
·
1 Parent(s): 1ee7b69

deployment config

Browse files
Files changed (9) hide show
  1. .gitignore +0 -2
  2. DockerFile +19 -0
  3. README.md +10 -0
  4. app.py +0 -5
  5. backend/db_utils.py +6 -4
  6. backend/main.py +7 -5
  7. frontend/app.py +3 -2
  8. start.sh +8 -0
  9. test.py +0 -25
.gitignore CHANGED
@@ -1,7 +1,5 @@
1
  venv
2
- InternTaskGenAI.pdf
3
  .env
4
  research_assistant.db
5
- neural computing cwsi.pdf
6
  app.log
7
  __pycache__
 
1
  venv
 
2
  .env
3
  research_assistant.db
 
4
  app.log
5
  __pycache__
DockerFile CHANGED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+
5
+
6
+ COPY backend /app/backend
7
+ COPY frontend /app/frontend
8
+
9
+
10
+ COPY requirements.txt /app/requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+
14
+ COPY start.sh /app/start.sh
15
+ RUN chmod +x /app/start.sh
16
+
17
+ EXPOSE 7860
18
+
19
+ CMD ["/app/start.sh"]
README.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # smart-research-assistant
2
 
3
  ## TODO:
 
1
+ ---
2
+ title: Smart Research Assistant
3
+ emoji: 🔥
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
  # smart-research-assistant
12
 
13
  ## TODO:
app.py DELETED
@@ -1,5 +0,0 @@
1
- from backend.pinecone_utilis import create_pinecone_vectorstore,load_and_split_document, index_document_to_pinecone
2
-
3
- file_path="InternTaskGenAI.pdf"
4
-
5
- print(index_document_to_pinecone(file_path=file_path, file_id=1))
 
 
 
 
 
 
backend/db_utils.py CHANGED
@@ -25,6 +25,7 @@ def create_document_store():
25
  conn = get_db_connection()
26
  conn.execute('''CREATE TABLE IF NOT EXISTS document_store
27
  (id INTEGER PRIMARY KEY AUTOINCREMENT,
 
28
  filename TEXT,
29
  content TEXT,
30
  upload_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
@@ -48,10 +49,11 @@ def get_chat_history(session_id):
48
 
49
  conn.close()
50
  return messages
51
- def insert_document_record(filename, content):
52
  conn = get_db_connection()
53
  cursor = conn.cursor()
54
- cursor.execute('INSERT INTO document_store (filename, content) VALUES (?, ?)', (filename, content))
 
55
  file_id = cursor.lastrowid
56
  conn.commit()
57
  conn.close()
@@ -64,10 +66,10 @@ def delete_document_record(file_id):
64
  conn.close()
65
  return True
66
 
67
- def get_all_documents():
68
  conn = get_db_connection()
69
  cursor = conn.cursor()
70
- cursor.execute('SELECT id, filename, upload_timestamp FROM document_store ORDER BY upload_timestamp DESC')
71
  documents = cursor.fetchall()
72
  conn.close()
73
  return [dict(doc) for doc in documents]
 
25
  conn = get_db_connection()
26
  conn.execute('''CREATE TABLE IF NOT EXISTS document_store
27
  (id INTEGER PRIMARY KEY AUTOINCREMENT,
28
+ session_id TEXT,
29
  filename TEXT,
30
  content TEXT,
31
  upload_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
 
49
 
50
  conn.close()
51
  return messages
52
+ def insert_document_record(session_id, filename, content):
53
  conn = get_db_connection()
54
  cursor = conn.cursor()
55
+ cursor.execute('INSERT INTO document_store (session_id, filename, content) VALUES (?, ?, ?)',
56
+ (session_id, filename, content))
57
  file_id = cursor.lastrowid
58
  conn.commit()
59
  conn.close()
 
66
  conn.close()
67
  return True
68
 
69
+ def get_all_documents(session_id):
70
  conn = get_db_connection()
71
  cursor = conn.cursor()
72
+ cursor.execute('SELECT id, filename, upload_timestamp FROM document_store WHERE session_id = ? ORDER BY upload_timestamp DESC', (session_id,))
73
  documents = cursor.fetchall()
74
  conn.close()
75
  return [dict(doc) for doc in documents]
backend/main.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, File, UploadFile, HTTPException
2
  from backend.pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest, ChallengeRequest, EvaluateAnswer
3
  from backend.langchain_utils import generate_response, retrieve
4
  from backend.db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record, get_file_content
@@ -99,7 +99,9 @@ def evaluate_response(request: EvaluateAnswer):
99
 
100
 
101
  @app.post("/upload-doc")
102
- def upload_and_index_document(file: UploadFile = File(...)):
 
 
103
  allowed_extensions = ['.pdf', '.txt']
104
  file_extension = os.path.splitext(file.filename)[1].lower()
105
 
@@ -114,7 +116,7 @@ def upload_and_index_document(file: UploadFile = File(...)):
114
  shutil.copyfileobj(file.file, buffer)
115
  docs = load_and_split_document(temp_file_path)
116
  docs_content = "\n\n".join(doc.page_content for doc in docs)
117
- file_id = insert_document_record(file.filename, docs_content)
118
  success = index_document_to_pinecone(temp_file_path, file_id)
119
 
120
  if success:
@@ -144,8 +146,8 @@ def upload_and_index_document(file: UploadFile = File(...)):
144
  os.remove(temp_file_path)
145
 
146
  @app.get("/list-docs", response_model=list[DocumentInfo])
147
- def list_documents():
148
- return get_all_documents()
149
 
150
  @app.post("/delete-doc")
151
  def delete_document(request: DeleteFileRequest):
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form
2
  from backend.pydantic_models import QueryInput, QueryResponse, DocumentInfo, DeleteFileRequest, ChallengeRequest, EvaluateAnswer
3
  from backend.langchain_utils import generate_response, retrieve
4
  from backend.db_utils import insert_application_logs, get_chat_history, get_all_documents, insert_document_record, delete_document_record, get_file_content
 
99
 
100
 
101
  @app.post("/upload-doc")
102
+ def upload_and_index_document(file: UploadFile = File(...), session_id: str = Form(None)):
103
+ if not session_id:
104
+ session_id = str(uuid.uuid4())
105
  allowed_extensions = ['.pdf', '.txt']
106
  file_extension = os.path.splitext(file.filename)[1].lower()
107
 
 
116
  shutil.copyfileobj(file.file, buffer)
117
  docs = load_and_split_document(temp_file_path)
118
  docs_content = "\n\n".join(doc.page_content for doc in docs)
119
+ file_id = insert_document_record(session_id, file.filename, docs_content)
120
  success = index_document_to_pinecone(temp_file_path, file_id)
121
 
122
  if success:
 
146
  os.remove(temp_file_path)
147
 
148
  @app.get("/list-docs", response_model=list[DocumentInfo])
149
+ def list_documents(session_id: str):
150
+ return get_all_documents(session_id)
151
 
152
  @app.post("/delete-doc")
153
  def delete_document(request: DeleteFileRequest):
frontend/app.py CHANGED
@@ -32,7 +32,8 @@ with st.sidebar:
32
  if st.button("Upload Document"):
33
  response = requests.post(
34
  f"{BACKEND_URL}/upload-doc",
35
- files={"file": (uploaded_file.name, uploaded_file, "application/octet-stream")}
 
36
  )
37
  if response.status_code == 200:
38
  data = response.json()
@@ -46,7 +47,7 @@ with st.sidebar:
46
  # List documents
47
  st.subheader("Uploaded Documents")
48
  try:
49
- documents = requests.get(f"{BACKEND_URL}/list-docs").json()
50
  for doc in documents:
51
  doc_id = doc["id"]
52
  with st.container(border=True):
 
32
  if st.button("Upload Document"):
33
  response = requests.post(
34
  f"{BACKEND_URL}/upload-doc",
35
+ files={"file": (uploaded_file.name, uploaded_file, "application/octet-stream")},
36
+ data={"session_id": st.session_state.session_id}
37
  )
38
  if response.status_code == 200:
39
  data = response.json()
 
47
  # List documents
48
  st.subheader("Uploaded Documents")
49
  try:
50
+ documents = requests.get(f"{BACKEND_URL}/list-docs", params={"session_id": st.session_state.session_id}).json()
51
  for doc in documents:
52
  doc_id = doc["id"]
53
  with st.container(border=True):
start.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+
4
+ cd /app/backend && uvicorn main:app --host 0.0.0.0 --port 8000 &
5
+
6
+
7
+ cd /app/frontend && streamlit run ui.py --server.port=7860 --server.address=0.0.0.0
8
+
test.py DELETED
@@ -1,25 +0,0 @@
1
-
2
- import requests
3
-
4
- # Set the FastAPI backend URL
5
- BASE_URL = "http://localhost:8000"
6
-
7
-
8
- # with open("neural computing cwsi.pdf", "rb") as f:
9
- # files = {"file": ("neural computing cwsi.pdf", f, "text/plain")}
10
- # upload_response = requests.post(f"{BASE_URL}/upload-doc", files=files)
11
- # # print("Upload Response:", upload_response.json())
12
-
13
- # file_id = upload_response.json().get("summary")
14
-
15
- # print(file_id)
16
-
17
- chat_data = {"question": "What is the main topic?", "model": "gpt-4o-mini"}
18
- chat_response = requests.post(f"{BASE_URL}/chat", json=chat_data)
19
- print("Chat Response:", chat_response.json())
20
-
21
- # delete_data={"file_id": 1}
22
-
23
- # delete_response = requests.post(f"{BASE_URL}/delete-doc", json=delete_data)
24
-
25
- # print("Delete Response:", delete_response.json())