Matthew Schulz commited on
Commit
003becb
·
1 Parent(s): 91759b9

refactor: updated drive upload to use browser localstorage to store id for uploading chats

Browse files
Files changed (3) hide show
  1. .gitignore +3 -4
  2. app.py +43 -29
  3. utils/chatLogger.py +56 -26
.gitignore CHANGED
@@ -1,9 +1,8 @@
1
  venv
2
  .env
3
- log.csv
4
- utils/__pycache__/doc_utils.cpython-312.pyc
5
- utils/__pycache__/utils.cpython-312.pyc
6
  test.py
7
  data/old_KB.py
8
  creds.json
9
- utils/__pycache__/google_drive_utils.cpython-312.pyc
 
1
  venv
2
  .env
3
+ .gradio
4
+ __pycache__
5
+ utils/__pycache__
6
  test.py
7
  data/old_KB.py
8
  creds.json
 
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import json
3
  import base64
 
4
  import logging
5
 
6
  from typing import Generator
@@ -16,7 +17,8 @@ from utils.utils import (
16
  get_messages,
17
  load_knowledge_base,
18
  )
19
- from utils.chatLogger import ChatLogger
 
20
 
21
 
22
  def initialize():
@@ -40,9 +42,6 @@ def initialize():
40
  logger.info("Loading env variables...")
41
  if not os.getenv("SPACE_ID"):
42
  load_dotenv()
43
- drive_creds_encoded = os.getenv(
44
- "GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64"
45
- ).strip()
46
 
47
  logger.info("Initializing OpenAI client...")
48
  openAI_client = OpenAI(
@@ -51,10 +50,13 @@ def initialize():
51
  )
52
 
53
  logger.info("Loading Drive service account details...")
 
 
 
54
  service_account_json = json.loads(base64.b64decode(drive_creds_encoded).decode())
55
 
56
- logger.info("Initializing ChatLogger instance...")
57
- chat_logger = ChatLogger(service_account_json)
58
 
59
  logger.info("Ready for user query...")
60
  return (
@@ -63,7 +65,7 @@ def initialize():
63
  knowledge_base,
64
  openAI_client,
65
  logger,
66
- chat_logger,
67
  )
68
 
69
 
@@ -73,13 +75,14 @@ def initialize():
73
  knowledge_base,
74
  openAI_client,
75
  logger,
76
- chat_logger,
77
  ) = initialize()
78
 
79
 
80
  def rag_chatbot(
81
  user_message: str,
82
- chat_history: list = None,
 
83
  ) -> Generator[list, None, None]:
84
  """
85
  Retrieves relevant documents to user query and streams LLM response catching errors along the way.
@@ -163,33 +166,44 @@ def rag_chatbot(
163
 
164
  try:
165
  logger.info("Trying to upload chat history to Drive...")
166
- chat_logger.upload_chat_history(chat_history)
167
  except Exception as e:
168
- logger.warning(f"Error during Google Drive upload: {e}")
169
 
170
  logger.info("Returning chat history...")
171
  return chat_history
172
 
173
 
174
  # Gradio app code
175
- demo = gr.ChatInterface(
176
- fn=rag_chatbot,
177
- title="Matthew Schulz's RAG Chatbot 💬🤖",
178
- type="messages",
179
- examples=[
180
- ["What is Matthew's educational background?"],
181
- ["What machine learning projects has Matthew worked on?"],
182
- ["What experience does Matthew have in software engineering?"],
183
- ["Why did Matthew choose to pursue a degree in computer science?"],
184
- ["Does Matthew have any leadership experience?"],
185
- ["Has Matthew completed any Summer internships?"],
186
- ["Tell me about some real-world projects Matthew has worked on."],
187
- ["What is Matthew's greatest strength and weakness?"],
188
- ],
189
- # save_history=True,
190
- run_examples_on_click=False,
191
- cache_examples=False,
192
- )
 
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  if __name__ == "__main__":
 
1
  import os
2
  import json
3
  import base64
4
+ import uuid
5
  import logging
6
 
7
  from typing import Generator
 
17
  get_messages,
18
  load_knowledge_base,
19
  )
20
+
21
+ from utils.chatLogger import ChatUploader
22
 
23
 
24
  def initialize():
 
42
  logger.info("Loading env variables...")
43
  if not os.getenv("SPACE_ID"):
44
  load_dotenv()
 
 
 
45
 
46
  logger.info("Initializing OpenAI client...")
47
  openAI_client = OpenAI(
 
50
  )
51
 
52
  logger.info("Loading Drive service account details...")
53
+ drive_creds_encoded = os.getenv(
54
+ "GOOGLE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS_BASE64"
55
+ ).strip()
56
  service_account_json = json.loads(base64.b64decode(drive_creds_encoded).decode())
57
 
58
+ logger.info("Initializing ChatUploader instance...")
59
+ chat_uploader = ChatUploader(service_account_json)
60
 
61
  logger.info("Ready for user query...")
62
  return (
 
65
  knowledge_base,
66
  openAI_client,
67
  logger,
68
+ chat_uploader,
69
  )
70
 
71
 
 
75
  knowledge_base,
76
  openAI_client,
77
  logger,
78
+ chat_uploader,
79
  ) = initialize()
80
 
81
 
82
  def rag_chatbot(
83
  user_message: str,
84
+ chat_history: list,
85
+ browser_id: str,
86
  ) -> Generator[list, None, None]:
87
  """
88
  Retrieves relevant documents to user query and streams LLM response catching errors along the way.
 
166
 
167
  try:
168
  logger.info("Trying to upload chat history to Drive...")
169
+ chat_uploader.upload_chat_history(chat_history, browser_id)
170
  except Exception as e:
171
+ logger.warning(f"Warning: error during Google Drive upload: {e}")
172
 
173
  logger.info("Returning chat history...")
174
  return chat_history
175
 
176
 
177
  # Gradio app code
178
+ with gr.Blocks() as demo:
179
+ browser_id_state = gr.BrowserState(default_value=None)
180
+
181
+ @demo.load(inputs=browser_id_state, outputs=browser_id_state)
182
+ def load_browser_id(current_id):
183
+ if current_id is None or current_id == "":
184
+ new_id = str(uuid.uuid4())
185
+ return new_id
186
+ return current_id
187
+
188
+ gr.ChatInterface(
189
+ fn=rag_chatbot,
190
+ title="Matthew Schulz's RAG Chatbot 💬🤖",
191
+ additional_inputs=browser_id_state,
192
+ type="messages",
193
+ examples=[
194
+ ["What is Matthew's educational background?"],
195
+ ["What machine learning projects has Matthew worked on?"],
196
+ ["What experience does Matthew have in software engineering?"],
197
+ ["Why did Matthew choose to pursue a degree in computer science?"],
198
+ ["Does Matthew have any leadership experience?"],
199
+ ["Has Matthew completed any Summer internships?"],
200
+ ["Tell me about some real-world projects Matthew has worked on."],
201
+ ["What is Matthew's greatest strength and weakness?"],
202
+ ],
203
+ save_history=True,
204
+ run_examples_on_click=False,
205
+ cache_examples=False,
206
+ )
207
 
208
 
209
  if __name__ == "__main__":
utils/chatLogger.py CHANGED
@@ -1,64 +1,94 @@
1
  import io
2
  import json
3
- import uuid
4
  from googleapiclient.discovery import build
5
- from googleapiclient.http import MediaIoBaseUpload
6
  from google.oauth2 import service_account
7
 
8
 
9
- class ChatLogger:
10
  def __init__(
11
  self,
12
  service_account_json: dict,
13
  root_folder_id: str = "1KtfVgL1Rg1iX-ZMHH4Im__ss-pgbaDM9",
14
  ):
15
  """
16
- Initializes a new chat logger instance using a service account JSON dict.
17
  """
18
  credentials = service_account.Credentials.from_service_account_info(
19
  service_account_json, scopes=["https://www.googleapis.com/auth/drive"]
20
  )
21
  self.drive_service = build("drive", "v3", credentials=credentials)
22
  self.root_folder_id = root_folder_id
23
- self.session_id = str(uuid.uuid4())
24
- self.folder_id = self._create_session_folder()
25
 
26
- def _create_session_folder(self) -> str:
27
  """
28
- Creates a folder in Drive for this session using the UUID.
29
  """
30
- metadata = {
31
- "name": f"session_{self.session_id}",
32
- "mimeType": "application/vnd.google-apps.folder",
33
- "parents": [self.root_folder_id],
34
- }
35
- folder = self.drive_service.files().create(body=metadata, fields="id").execute()
36
- return folder["id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def upload_chat_history(
39
- self, chat_history: list, filename: str = "chat_log.json"
40
  ) -> None:
41
  """
42
- Uploads or updates a single chat log file inside the session's folder.
43
  """
 
 
44
  query = (
45
- f"name = '{filename}' and '{self.folder_id}' in parents and "
46
- f"mimeType = 'application/json' and trashed = false"
47
  )
48
  results = self.drive_service.files().list(q=query, fields="files(id)").execute()
49
  files = results.get("files", [])
50
 
51
- content = json.dumps(chat_history, indent=2)
52
-
53
- media = MediaIoBaseUpload(
54
- io.BytesIO(content.encode()), mimetype="application/json"
55
- )
56
-
57
  if files:
58
  file_id = files[0]["id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  self.drive_service.files().update(
60
  fileId=file_id, media_body=media
61
  ).execute()
62
  else:
63
- metadata = {"name": filename, "parents": [self.folder_id]}
 
 
 
 
64
  self.drive_service.files().create(body=metadata, media_body=media).execute()
 
1
  import io
2
  import json
 
3
  from googleapiclient.discovery import build
4
+ from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
5
  from google.oauth2 import service_account
6
 
7
 
8
+ class ChatUploader:
9
  def __init__(
10
  self,
11
  service_account_json: dict,
12
  root_folder_id: str = "1KtfVgL1Rg1iX-ZMHH4Im__ss-pgbaDM9",
13
  ):
14
  """
15
+ Initializes a new chat uploader instance using a service account JSON dict.
16
  """
17
  credentials = service_account.Credentials.from_service_account_info(
18
  service_account_json, scopes=["https://www.googleapis.com/auth/drive"]
19
  )
20
  self.drive_service = build("drive", "v3", credentials=credentials)
21
  self.root_folder_id = root_folder_id
 
 
22
 
23
+ def _get_or_create_browser_folder(self, browser_id: str) -> str:
24
  """
25
+ Searches for an existing folder for the given browser_id. If not found, creates a folder named 'browser_{browser_id}' and returns its ID.
26
  """
27
+ folder_name = f"browser_{browser_id}"
28
+ query = (
29
+ f"name = '{folder_name}' and '{self.root_folder_id}' in parents and "
30
+ "mimeType = 'application/vnd.google-apps.folder' and trashed = false"
31
+ )
32
+ results = self.drive_service.files().list(q=query, fields="files(id)").execute()
33
+ folders = results.get("files", [])
34
+
35
+ if folders:
36
+ return folders[0]["id"]
37
+ else:
38
+ metadata = {
39
+ "name": folder_name,
40
+ "mimeType": "application/vnd.google-apps.folder",
41
+ "parents": [self.root_folder_id],
42
+ }
43
+ folder = (
44
+ self.drive_service.files().create(body=metadata, fields="id").execute()
45
+ )
46
+ return folder["id"]
47
 
48
  def upload_chat_history(
49
+ self, chat_history: list, browser_id: str, filename: str = "chat_log.json"
50
  ) -> None:
51
  """
52
+ Uploads the chat log file inside the browser's folder. If the folder and/or file exists, it appends the new chat entries to the current log. Otherwise, it creates them.
53
  """
54
+ folder_id = self._get_or_create_browser_folder(browser_id)
55
+
56
  query = (
57
+ f"name = '{filename}' and '{folder_id}' in parents and "
58
+ "mimeType = 'application/json' and trashed = false"
59
  )
60
  results = self.drive_service.files().list(q=query, fields="files(id)").execute()
61
  files = results.get("files", [])
62
 
 
 
 
 
 
 
63
  if files:
64
  file_id = files[0]["id"]
65
+
66
+ request = self.drive_service.files().get_media(fileId=file_id)
67
+ existing_stream = io.BytesIO()
68
+ downloader = MediaIoBaseDownload(existing_stream, request)
69
+ done = False
70
+ while not done:
71
+ _, done = downloader.next_chunk()
72
+
73
+ existing_stream.seek(0)
74
+ try:
75
+ existing_chat_history = json.loads(existing_stream.read())
76
+ except json.JSONDecodeError:
77
+ existing_chat_history = []
78
+
79
+ updated_chat_history = existing_chat_history + chat_history
80
+
81
+ content = json.dumps(updated_chat_history, indent=2)
82
+ media = MediaIoBaseUpload(
83
+ io.BytesIO(content.encode()), mimetype="application/json"
84
+ )
85
  self.drive_service.files().update(
86
  fileId=file_id, media_body=media
87
  ).execute()
88
  else:
89
+ content = json.dumps(chat_history, indent=2)
90
+ media = MediaIoBaseUpload(
91
+ io.BytesIO(content.encode()), mimetype="application/json"
92
+ )
93
+ metadata = {"name": filename, "parents": [folder_id]}
94
  self.drive_service.files().create(body=metadata, media_body=media).execute()