"""Module for AstraDB database""" import logging import os import astrapy from dotenv import load_dotenv load_dotenv() logging.basicConfig( format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S", level=logging.ERROR) class KnowledgeBase: # pylint: disable=too-few-public-methods """ AstraDB class for direct collection operations. """ def __init__(self): """Initialize AstraDB connection.""" self.collection = astrapy.DataAPIClient( os.environ["ASTRA_DB_APPLICATION_TOKEN"]).get_database( os.environ["ASTRA_DB_API_ENDPOINT"]).documents def get_doc_count(self, user_id: str) -> dict: """ Count unique emails and files for a specific user. Args: user_id (str): The user's email address Returns: dict: {"emails": count, "files": count, "total_documents": count} Raises: ValueError: If user_id is invalid Exception: If database query fails """ if not user_id or not isinstance(user_id, str): raise ValueError("user_id must be a non-empty string") try: # Get all documents for the user with type gmail or file filter_criteria = { "metadata.userId": user_id, "metadata.type": {"$in": ["gmail", "file"]} } # Use direct collection access results = list(self.collection.find(filter=filter_criteria)) # Group by metadata.id to get unique documents unique_docs = {} for doc in results: doc_id = doc.get("metadata", {}).get("id") doc_type = doc.get("metadata", {}).get("type") if doc_id and doc_type: if doc_id not in unique_docs: unique_docs[doc_id] = doc_type # Count by type email_count = sum(1 for doc_type in unique_docs.values() if doc_type == "gmail") file_count = sum(1 for doc_type in unique_docs.values() if doc_type == "file") total_count = len(unique_docs) return { "gmail": email_count, "file": file_count, "total": total_count } except Exception as e: # pylint: disable=broad-exception-caught logging.error("Failed to get document count for user %s: %s", user_id, str(e)) # pylint: disable=raise-missing-from raise Exception(f"Database query failed: {str(e)}") # pylint: disable=broad-exception-raised