Merge pull request #43 from oxbridge-econ/dev
Browse files- app/app.py +2 -2
 - app/collectors/finfast/keyword_analysis.py +0 -1
 - app/controllers/entity.py +29 -0
 - app/controllers/keyword.py +1 -1
 - app/models/database/astra.py +3 -3
 - app/models/llm/azure.py +0 -1
 - app/routes/entity.py +22 -0
 - app/routes/knowledge_base.py +0 -59
 - app/routes/summary.py +108 -3
 
    	
        app/app.py
    CHANGED
    
    | 
         @@ -10,7 +10,7 @@ from fastapi.responses import JSONResponse 
     | 
|
| 10 | 
         
             
            from apscheduler.schedulers.asyncio import AsyncIOScheduler
         
     | 
| 11 | 
         
             
            from apscheduler.triggers.cron import CronTrigger
         
     | 
| 12 | 
         | 
| 13 | 
         
            -
            from routes import category, summary, keyword, lda,  
     | 
| 14 | 
         | 
| 15 | 
         | 
| 16 | 
         
             
            class Config: # pylint: disable=too-few-public-methods
         
     | 
| 
         @@ -172,7 +172,7 @@ app.include_router(category.router) 
     | 
|
| 172 | 
         
             
            app.include_router(summary.router)
         
     | 
| 173 | 
         
             
            app.include_router(keyword.router)
         
     | 
| 174 | 
         
             
            app.include_router(lda.router)
         
     | 
| 175 | 
         
            -
            app.include_router( 
     | 
| 176 | 
         | 
| 177 | 
         
             
            @app.get("/_health")
         
     | 
| 178 | 
         
             
            def health():
         
     | 
| 
         | 
|
| 10 | 
         
             
            from apscheduler.schedulers.asyncio import AsyncIOScheduler
         
     | 
| 11 | 
         
             
            from apscheduler.triggers.cron import CronTrigger
         
     | 
| 12 | 
         | 
| 13 | 
         
            +
            from routes import category, summary, keyword, lda, entity # pylint: disable=import-error
         
     | 
| 14 | 
         | 
| 15 | 
         | 
| 16 | 
         
             
            class Config: # pylint: disable=too-few-public-methods
         
     | 
| 
         | 
|
| 172 | 
         
             
            app.include_router(summary.router)
         
     | 
| 173 | 
         
             
            app.include_router(keyword.router)
         
     | 
| 174 | 
         
             
            app.include_router(lda.router)
         
     | 
| 175 | 
         
            +
            app.include_router(entity.router)
         
     | 
| 176 | 
         | 
| 177 | 
         
             
            @app.get("/_health")
         
     | 
| 178 | 
         
             
            def health():
         
     | 
    	
        app/collectors/finfast/keyword_analysis.py
    CHANGED
    
    | 
         @@ -140,4 +140,3 @@ def _perform_analysis_for_all_periods() -> None: 
     | 
|
| 140 | 
         
             
                    except Exception as e: # pylint: disable=broad-exception-caught
         
     | 
| 141 | 
         
             
                        logger.error("Error analyzing %s period: %s", period, str(e))
         
     | 
| 142 | 
         
             
                        # Continue with other periods even if one fails
         
     | 
| 143 | 
         
            -
             
         
     | 
| 
         | 
|
| 140 | 
         
             
                    except Exception as e: # pylint: disable=broad-exception-caught
         
     | 
| 141 | 
         
             
                        logger.error("Error analyzing %s period: %s", period, str(e))
         
     | 
| 142 | 
         
             
                        # Continue with other periods even if one fails
         
     | 
| 
         | 
    	
        app/controllers/entity.py
    ADDED
    
    | 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """Controller for entity-related operations."""
         
     | 
| 2 | 
         
            +
            from models.database import entity_collection
         
     | 
| 3 | 
         
            +
             
     | 
| 4 | 
         
            +
            def retrieve_hot_entity():
         
     | 
| 5 | 
         
            +
                """Retrieves the top 200 documents from a MongoDB collection,
         
     | 
| 6 | 
         
            +
                sorted by the occurrence field in descending order.
         
     | 
| 7 | 
         
            +
                The function returns a dict containing the count of documents and a list of documents
         
     | 
| 8 | 
         
            +
                Parameters
         
     | 
| 9 | 
         
            +
                None
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
                Type: dict
         
     | 
| 12 | 
         
            +
                Structure:
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
                Count (int): The number of documents retrieved (up to 200).
         
     | 
| 15 | 
         
            +
                Items (list): A list of dictionaries, each containing:
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
                entity: The value of the entity field from the document.
         
     | 
| 18 | 
         
            +
                entityType: The value of the entityType field from the document.
         
     | 
| 19 | 
         
            +
                total_occurrence: The value of the occurrence field from the document.
         
     | 
| 20 | 
         
            +
                """
         
     | 
| 21 | 
         
            +
                result = list(entity_collection.find(
         
     | 
| 22 | 
         
            +
                    {},
         
     | 
| 23 | 
         
            +
                    {'entity': 1, 'entityType': 1, 'total_occurrence': '$occurrence', '_id': 0}
         
     | 
| 24 | 
         
            +
                ).sort('occurrence', -1).limit(200))
         
     | 
| 25 | 
         
            +
                res = {
         
     | 
| 26 | 
         
            +
                    "Count": len(result),
         
     | 
| 27 | 
         
            +
                    "Items": result
         
     | 
| 28 | 
         
            +
                }
         
     | 
| 29 | 
         
            +
                return res
         
     | 
    	
        app/controllers/keyword.py
    CHANGED
    
    | 
         @@ -661,7 +661,7 @@ def invoke_llm_for_batch(articles_in_batch: list, historical_keywords: set) -> L 
     | 
|
| 661 | 
         | 
| 662 | 
         
             
                try:
         
     | 
| 663 | 
         
             
                    categories_text = "\n".join([
         
     | 
| 664 | 
         
            -
                        f"- {cat}: {', '.join(keywords[:10])}" + ("..." if len(keywords) > 10 else "") 
     | 
| 665 | 
         
             
                        for cat, keywords in FIN_KEYWORDS.items()
         
     | 
| 666 | 
         
             
                    ])
         
     | 
| 667 | 
         | 
| 
         | 
|
| 661 | 
         | 
| 662 | 
         
             
                try:
         
     | 
| 663 | 
         
             
                    categories_text = "\n".join([
         
     | 
| 664 | 
         
            +
                        f"- {cat}: {', '.join(keywords[:10])}" + ("..." if len(keywords) > 10 else "")
         
     | 
| 665 | 
         
             
                        for cat, keywords in FIN_KEYWORDS.items()
         
     | 
| 666 | 
         
             
                    ])
         
     | 
| 667 | 
         | 
    	
        app/models/database/astra.py
    CHANGED
    
    | 
         @@ -67,9 +67,9 @@ class KnowledgeBase: # pylint: disable=too-few-public-methods 
     | 
|
| 67 | 
         
             
                        total_count = len(unique_docs)
         
     | 
| 68 | 
         | 
| 69 | 
         
             
                        return {
         
     | 
| 70 | 
         
            -
                            " 
     | 
| 71 | 
         
            -
                            " 
     | 
| 72 | 
         
            -
                            " 
     | 
| 73 | 
         
             
                        }
         
     | 
| 74 | 
         | 
| 75 | 
         
             
                    except Exception as e:  # pylint: disable=broad-exception-caught
         
     | 
| 
         | 
|
| 67 | 
         
             
                        total_count = len(unique_docs)
         
     | 
| 68 | 
         | 
| 69 | 
         
             
                        return {
         
     | 
| 70 | 
         
            +
                            "gmail": email_count,
         
     | 
| 71 | 
         
            +
                            "file": file_count,
         
     | 
| 72 | 
         
            +
                            "total": total_count
         
     | 
| 73 | 
         
             
                        }
         
     | 
| 74 | 
         | 
| 75 | 
         
             
                    except Exception as e:  # pylint: disable=broad-exception-caught
         
     | 
    	
        app/models/llm/azure.py
    CHANGED
    
    | 
         @@ -32,4 +32,3 @@ class GPTModel(AzureChatOpenAI): 
     | 
|
| 32 | 
         
             
                        streaming=True,
         
     | 
| 33 | 
         
             
                        temperature=0
         
     | 
| 34 | 
         
             
                    )
         
     | 
| 35 | 
         
            -
                   
         
     | 
| 
         | 
|
| 32 | 
         
             
                        streaming=True,
         
     | 
| 33 | 
         
             
                        temperature=0
         
     | 
| 34 | 
         
             
                    )
         
     | 
| 
         | 
    	
        app/routes/entity.py
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """This module defines the /lda route for the FastAPI application."""
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            import logging
         
     | 
| 4 | 
         
            +
            from fastapi import APIRouter
         
     | 
| 5 | 
         
            +
            from fastapi.responses import JSONResponse
         
     | 
| 6 | 
         
            +
            from controllers.entity import retrieve_hot_entity # pylint: disable=import-error
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            # Configure logger
         
     | 
| 9 | 
         
            +
            logger = logging.getLogger(__name__)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Create FastAPI Router
         
     | 
| 12 | 
         
            +
            router = APIRouter(prefix="/entity", tags=["entity"])
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            @router.get('/hot')
         
     | 
| 15 | 
         
            +
            async def get_hot_entity():
         
     | 
| 16 | 
         
            +
                """
         
     | 
| 17 | 
         
            +
                Handles GET requests to retrieve hot entity.
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                Returns:
         
     | 
| 20 | 
         
            +
                    dict: JSON response containing count and a list of hot entity.
         
     | 
| 21 | 
         
            +
                """
         
     | 
| 22 | 
         
            +
                return JSONResponse(content=retrieve_hot_entity())
         
     | 
    	
        app/routes/knowledge_base.py
    DELETED
    
    | 
         @@ -1,59 +0,0 @@ 
     | 
|
| 1 | 
         
            -
            """This module defines the /knowledge-base route for the FastAPI application."""
         
     | 
| 2 | 
         
            -
            import logging
         
     | 
| 3 | 
         
            -
            from fastapi import APIRouter, HTTPException, Path
         
     | 
| 4 | 
         
            -
            from fastapi.responses import JSONResponse
         
     | 
| 5 | 
         
            -
            from models.database import knowledge_base  # pylint: disable=import-error
         
     | 
| 6 | 
         
            -
             
     | 
| 7 | 
         
            -
            router = APIRouter(prefix="/knowledge-base", tags=["knowledge-base"])
         
     | 
| 8 | 
         
            -
             
     | 
| 9 | 
         
            -
            @router.get("/{user_id}")
         
     | 
| 10 | 
         
            -
            async def get_user_document_stats(
         
     | 
| 11 | 
         
            -
                user_id: str = Path(..., description="User's email address")
         
     | 
| 12 | 
         
            -
                ) -> JSONResponse:
         
     | 
| 13 | 
         
            -
                """
         
     | 
| 14 | 
         
            -
                Get document statistics for a specific user.
         
     | 
| 15 | 
         
            -
                
         
     | 
| 16 | 
         
            -
                This endpoint counts the number of unique emails and files uploaded by the user.
         
     | 
| 17 | 
         
            -
                It groups documents by metadata.id to ensure unique document counting (not chunks).
         
     | 
| 18 | 
         
            -
                
         
     | 
| 19 | 
         
            -
                Args:
         
     | 
| 20 | 
         
            -
                    user_id (str): The user's email address
         
     | 
| 21 | 
         
            -
                    
         
     | 
| 22 | 
         
            -
                Returns:
         
     | 
| 23 | 
         
            -
                    JSONResponse: A JSON response containing document counts:
         
     | 
| 24 | 
         
            -
                    {
         
     | 
| 25 | 
         
            -
                        "user_id": "[email protected]",
         
     | 
| 26 | 
         
            -
                        "emails": 5,
         
     | 
| 27 | 
         
            -
                        "files": 12,
         
     | 
| 28 | 
         
            -
                        "total_documents": 17
         
     | 
| 29 | 
         
            -
                    }
         
     | 
| 30 | 
         
            -
                    
         
     | 
| 31 | 
         
            -
                Raises:
         
     | 
| 32 | 
         
            -
                    HTTPException: 400 for invalid user_id, 500 for database errors
         
     | 
| 33 | 
         
            -
                """
         
     | 
| 34 | 
         
            -
                try:
         
     | 
| 35 | 
         
            -
                    # Validate user_id format (basic email validation)
         
     | 
| 36 | 
         
            -
                    if not user_id or "@" not in user_id or "." not in user_id:
         
     | 
| 37 | 
         
            -
                        raise HTTPException(
         
     | 
| 38 | 
         
            -
                            status_code=400,
         
     | 
| 39 | 
         
            -
                            detail="Invalid user_id format. Must be a valid email address."
         
     | 
| 40 | 
         
            -
                        )
         
     | 
| 41 | 
         
            -
             
     | 
| 42 | 
         
            -
                    # Get document counts from database
         
     | 
| 43 | 
         
            -
                    result = knowledge_base.get_doc_count(user_id)
         
     | 
| 44 | 
         
            -
             
     | 
| 45 | 
         
            -
                    # Add user_id to response
         
     | 
| 46 | 
         
            -
                    result["user_id"] = user_id
         
     | 
| 47 | 
         
            -
             
     | 
| 48 | 
         
            -
                    return JSONResponse(content=result, status_code=200)
         
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
                except ValueError as e:
         
     | 
| 51 | 
         
            -
                    logging.error("Validation error for user %s: %s", user_id, str(e))
         
     | 
| 52 | 
         
            -
                    raise HTTPException(status_code=400, detail=str(e)) # pylint: disable=raise-missing-from
         
     | 
| 53 | 
         
            -
             
     | 
| 54 | 
         
            -
                except Exception as e:  # pylint: disable=broad-exception-caught
         
     | 
| 55 | 
         
            -
                    logging.error("Database error for user %s: %s", user_id, str(e))
         
     | 
| 56 | 
         
            -
                    raise HTTPException( # pylint: disable=raise-missing-from
         
     | 
| 57 | 
         
            -
                        status_code=500,
         
     | 
| 58 | 
         
            -
                        detail="Internal server error while retrieving document statistics."
         
     | 
| 59 | 
         
            -
                    )
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
    	
        app/routes/summary.py
    CHANGED
    
    | 
         @@ -1,12 +1,67 @@ 
     | 
|
| 1 | 
         
             
            """This module defines the /summary route for the Flask application."""
         
     | 
| 2 | 
         | 
| 3 | 
         
             
            import importlib
         
     | 
| 4 | 
         
            -
            from  
     | 
| 
         | 
|
| 
         | 
|
| 5 | 
         
             
            from fastapi.responses import JSONResponse
         
     | 
| 6 | 
         
             
            from controllers.summary import get_summary_data # pylint: disable=import-error
         
     | 
| 
         | 
|
| 7 | 
         | 
| 8 | 
         
             
            router = APIRouter(prefix="/summary", tags=["summary"])
         
     | 
| 9 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 10 | 
         
             
            @router.get('')
         
     | 
| 11 | 
         
             
            async def get_summary() -> JSONResponse:
         
     | 
| 12 | 
         
             
                """
         
     | 
| 
         @@ -31,7 +86,6 @@ async def get_summary() -> JSONResponse: 
     | 
|
| 31 | 
         
             
                except Exception as e: #pylint: disable=broad-except
         
     | 
| 32 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 33 | 
         | 
| 34 | 
         
            -
             
     | 
| 35 | 
         
             
            @router.get("/{module}/{chart_id}")
         
     | 
| 36 | 
         
             
            def get_summary_chart(module: str, chart_id: str) -> JSONResponse:
         
     | 
| 37 | 
         
             
                """
         
     | 
| 
         @@ -59,7 +113,6 @@ def get_summary_chart(module: str, chart_id: str) -> JSONResponse: 
     | 
|
| 59 | 
         
             
                except AttributeError as e:
         
     | 
| 60 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 61 | 
         | 
| 62 | 
         
            -
             
     | 
| 63 | 
         
             
            @router.get("/{module}")
         
     | 
| 64 | 
         
             
            async def get_summary_module(module: str) -> JSONResponse:
         
     | 
| 65 | 
         
             
                """
         
     | 
| 
         @@ -79,3 +132,55 @@ async def get_summary_module(module: str) -> JSONResponse: 
     | 
|
| 79 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=404)
         
     | 
| 80 | 
         
             
                except Exception as e: #pylint: disable=broad-except
         
     | 
| 81 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            """This module defines the /summary route for the Flask application."""
         
     | 
| 2 | 
         | 
| 3 | 
         
             
            import importlib
         
     | 
| 4 | 
         
            +
            from venv import logger
         
     | 
| 5 | 
         
            +
            import re
         
     | 
| 6 | 
         
            +
            from fastapi import APIRouter, HTTPException, Path
         
     | 
| 7 | 
         
             
            from fastapi.responses import JSONResponse
         
     | 
| 8 | 
         
             
            from controllers.summary import get_summary_data # pylint: disable=import-error
         
     | 
| 9 | 
         
            +
            from models.database import knowledge_base  # pylint: disable=import-error
         
     | 
| 10 | 
         | 
| 11 | 
         
             
            router = APIRouter(prefix="/summary", tags=["summary"])
         
     | 
| 12 | 
         | 
| 13 | 
         
            +
            @router.get("/{email}")
         
     | 
| 14 | 
         
            +
            async def get_user_document_stats(
         
     | 
| 15 | 
         
            +
                email: str = Path(..., description="User's email address")
         
     | 
| 16 | 
         
            +
                ) -> JSONResponse:
         
     | 
| 17 | 
         
            +
                """
         
     | 
| 18 | 
         
            +
                Get document statistics for a specific user.
         
     | 
| 19 | 
         
            +
                
         
     | 
| 20 | 
         
            +
                This endpoint counts the number of unique emails and files uploaded by the user.
         
     | 
| 21 | 
         
            +
                It groups documents by metadata.id to ensure unique document counting (not chunks).
         
     | 
| 22 | 
         
            +
                
         
     | 
| 23 | 
         
            +
                Args:
         
     | 
| 24 | 
         
            +
                    email (str): The user's email address
         
     | 
| 25 | 
         
            +
                    
         
     | 
| 26 | 
         
            +
                Returns:
         
     | 
| 27 | 
         
            +
                    JSONResponse: A JSON response containing document counts:
         
     | 
| 28 | 
         
            +
                    {
         
     | 
| 29 | 
         
            +
                        "email": "[email protected]",
         
     | 
| 30 | 
         
            +
                        "emails": 5,
         
     | 
| 31 | 
         
            +
                        "files": 12,
         
     | 
| 32 | 
         
            +
                        "total_documents": 17
         
     | 
| 33 | 
         
            +
                    }
         
     | 
| 34 | 
         
            +
                    
         
     | 
| 35 | 
         
            +
                Raises:
         
     | 
| 36 | 
         
            +
                    HTTPException: 400 for invalid email, 500 for database errors
         
     | 
| 37 | 
         
            +
                """
         
     | 
| 38 | 
         
            +
                try:
         
     | 
| 39 | 
         
            +
                    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
         
     | 
| 40 | 
         
            +
                    if not email or not re.match(email_pattern, email):
         
     | 
| 41 | 
         
            +
                        raise HTTPException(
         
     | 
| 42 | 
         
            +
                            status_code=400,
         
     | 
| 43 | 
         
            +
                            detail="Invalid email format. Must be a valid email address."
         
     | 
| 44 | 
         
            +
                        )
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                    # Get document counts from database
         
     | 
| 47 | 
         
            +
                    result = knowledge_base.get_doc_count(email)
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
                    # Add email to response
         
     | 
| 50 | 
         
            +
                    result["email"] = email
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
                    return JSONResponse(content=result, status_code=200)
         
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
                except ValueError as e:
         
     | 
| 55 | 
         
            +
                    logger.error("Validation error for user %s: %s", email, str(e))
         
     | 
| 56 | 
         
            +
                    raise HTTPException(status_code=400, detail=str(e)) # pylint: disable=raise-missing-from
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                except Exception as e:  # pylint: disable=broad-exception-caught
         
     | 
| 59 | 
         
            +
                    logger.error("Database error for user %s: %s", email, str(e))
         
     | 
| 60 | 
         
            +
                    raise HTTPException( # pylint: disable=raise-missing-from
         
     | 
| 61 | 
         
            +
                        status_code=500,
         
     | 
| 62 | 
         
            +
                        detail="Internal server error while retrieving document statistics."
         
     | 
| 63 | 
         
            +
                    )
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
             
            @router.get('')
         
     | 
| 66 | 
         
             
            async def get_summary() -> JSONResponse:
         
     | 
| 67 | 
         
             
                """
         
     | 
| 
         | 
|
| 86 | 
         
             
                except Exception as e: #pylint: disable=broad-except
         
     | 
| 87 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 88 | 
         | 
| 
         | 
|
| 89 | 
         
             
            @router.get("/{module}/{chart_id}")
         
     | 
| 90 | 
         
             
            def get_summary_chart(module: str, chart_id: str) -> JSONResponse:
         
     | 
| 91 | 
         
             
                """
         
     | 
| 
         | 
|
| 113 | 
         
             
                except AttributeError as e:
         
     | 
| 114 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 115 | 
         | 
| 
         | 
|
| 116 | 
         
             
            @router.get("/{module}")
         
     | 
| 117 | 
         
             
            async def get_summary_module(module: str) -> JSONResponse:
         
     | 
| 118 | 
         
             
                """
         
     | 
| 
         | 
|
| 132 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=404)
         
     | 
| 133 | 
         
             
                except Exception as e: #pylint: disable=broad-except
         
     | 
| 134 | 
         
             
                    return JSONResponse(content={"error": str(e)}, status_code=500)
         
     | 
| 135 | 
         
            +
             
     | 
| 136 | 
         
            +
            # @router.get("/{email}")
         
     | 
| 137 | 
         
            +
            # async def get_user_document_stats(
         
     | 
| 138 | 
         
            +
            #     email: str = Path(..., description="User's email address")
         
     | 
| 139 | 
         
            +
            #     ) -> JSONResponse:
         
     | 
| 140 | 
         
            +
            #     """
         
     | 
| 141 | 
         
            +
            #     Get document statistics for a specific user.
         
     | 
| 142 | 
         
            +
                
         
     | 
| 143 | 
         
            +
            #     This endpoint counts the number of unique emails and files uploaded by the user.
         
     | 
| 144 | 
         
            +
            #     It groups documents by metadata.id to ensure unique document counting (not chunks).
         
     | 
| 145 | 
         
            +
                
         
     | 
| 146 | 
         
            +
            #     Args:
         
     | 
| 147 | 
         
            +
            #         email (str): The user's email address
         
     | 
| 148 | 
         
            +
                    
         
     | 
| 149 | 
         
            +
            #     Returns:
         
     | 
| 150 | 
         
            +
            #         JSONResponse: A JSON response containing document counts:
         
     | 
| 151 | 
         
            +
            #         {
         
     | 
| 152 | 
         
            +
            #             "email": "[email protected]",
         
     | 
| 153 | 
         
            +
            #             "emails": 5,
         
     | 
| 154 | 
         
            +
            #             "files": 12,
         
     | 
| 155 | 
         
            +
            #             "total_documents": 17
         
     | 
| 156 | 
         
            +
            #         }
         
     | 
| 157 | 
         
            +
                    
         
     | 
| 158 | 
         
            +
            #     Raises:
         
     | 
| 159 | 
         
            +
            #         HTTPException: 400 for invalid email, 500 for database errors
         
     | 
| 160 | 
         
            +
            #     """
         
     | 
| 161 | 
         
            +
            #     try:
         
     | 
| 162 | 
         
            +
            #         email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
         
     | 
| 163 | 
         
            +
            #         if not email or not re.match(email_pattern, email):
         
     | 
| 164 | 
         
            +
            #             raise HTTPException(
         
     | 
| 165 | 
         
            +
            #                 status_code=400,
         
     | 
| 166 | 
         
            +
            #                 detail="Invalid email format. Must be a valid email address."
         
     | 
| 167 | 
         
            +
            #             )
         
     | 
| 168 | 
         
            +
             
     | 
| 169 | 
         
            +
            #         # Get document counts from database
         
     | 
| 170 | 
         
            +
            #         result = knowledge_base.get_doc_count(email)
         
     | 
| 171 | 
         
            +
             
     | 
| 172 | 
         
            +
            #         # Add email to response
         
     | 
| 173 | 
         
            +
            #         result["email"] = email
         
     | 
| 174 | 
         
            +
             
     | 
| 175 | 
         
            +
            #         return JSONResponse(content=result, status_code=200)
         
     | 
| 176 | 
         
            +
             
     | 
| 177 | 
         
            +
            #     except ValueError as e:
         
     | 
| 178 | 
         
            +
            #         logger.error("Validation error for user %s: %s", email, str(e))
         
     | 
| 179 | 
         
            +
            #         raise HTTPException(status_code=400, detail=str(e)) # pylint: disable=raise-missing-from
         
     | 
| 180 | 
         
            +
             
     | 
| 181 | 
         
            +
            #     except Exception as e:  # pylint: disable=broad-exception-caught
         
     | 
| 182 | 
         
            +
            #         logger.error("Database error for user %s: %s", email, str(e))
         
     | 
| 183 | 
         
            +
            #         raise HTTPException( # pylint: disable=raise-missing-from
         
     | 
| 184 | 
         
            +
            #             status_code=500,
         
     | 
| 185 | 
         
            +
            #             detail="Internal server error while retrieving document statistics."
         
     | 
| 186 | 
         
            +
            #         )
         
     |