|
"""Module for AstraDB database""" |
|
import logging |
|
import os |
|
|
|
import astrapy |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
|
|
logging.basicConfig( |
|
format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s', |
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
level=logging.ERROR) |
|
|
|
|
|
class KnowledgeBase: |
|
""" |
|
AstraDB class for direct collection operations. |
|
""" |
|
|
|
def __init__(self): |
|
"""Initialize AstraDB connection.""" |
|
self.collection = astrapy.DataAPIClient( |
|
os.environ["ASTRA_DB_APPLICATION_TOKEN"]).get_database( |
|
os.environ["ASTRA_DB_API_ENDPOINT"]).documents |
|
|
|
def get_doc_count(self, user_id: str) -> dict: |
|
""" |
|
Count unique emails and files for a specific user. |
|
|
|
Args: |
|
user_id (str): The user's email address |
|
|
|
Returns: |
|
dict: {"emails": count, "files": count, "total_documents": count} |
|
|
|
Raises: |
|
ValueError: If user_id is invalid |
|
Exception: If database query fails |
|
""" |
|
if not user_id or not isinstance(user_id, str): |
|
raise ValueError("user_id must be a non-empty string") |
|
|
|
try: |
|
|
|
filter_criteria = { |
|
"metadata.userId": user_id, |
|
"metadata.type": {"$in": ["gmail", "file"]} |
|
} |
|
|
|
|
|
results = list(self.collection.find(filter=filter_criteria)) |
|
|
|
|
|
unique_docs = {} |
|
for doc in results: |
|
doc_id = doc.get("metadata", {}).get("id") |
|
doc_type = doc.get("metadata", {}).get("type") |
|
|
|
if doc_id and doc_type: |
|
if doc_id not in unique_docs: |
|
unique_docs[doc_id] = doc_type |
|
|
|
|
|
email_count = sum(1 for doc_type in unique_docs.values() if doc_type == "gmail") |
|
file_count = sum(1 for doc_type in unique_docs.values() if doc_type == "file") |
|
total_count = len(unique_docs) |
|
|
|
return { |
|
"gmail": email_count, |
|
"file": file_count, |
|
"total": total_count |
|
} |
|
|
|
except Exception as e: |
|
logging.error("Failed to get document count for user %s: %s", user_id, str(e)) |
|
|
|
raise Exception(f"Database query failed: {str(e)}") |
|
|