Spaces:

Oxbridge-Economics
/

finfast-summary

Running

Muhammad Abdur Rahman Saad commited on Sep 17

Commit

66aefd0

unverified ·

2 Parent(s): 3e686b1 0e34a58

Merge pull request #47 from oxbridge-econ/abdur/fix/word-cloud

Files changed (4) hide show

app/collectors/finfast/lda.py CHANGED Viewed

@@ -106,13 +106,15 @@ def calculate_keyword_sentiments(documents, term_frequencies, analyzer):
     return sentiment_scores
-def update_lda_result(filter_type, lda_results):
     """
     Update LDA results in MongoDB collection.
     Args:
         filter_type (str): Time filter type ('today', 'week', 'month')
         lda_results (list): List of topic data from get_lda_results()
     Returns:
         bool: True if successful, False otherwise
@@ -122,6 +124,7 @@ def update_lda_result(filter_type, lda_results):
         document = {
             '_id': filter_type,
             'result': lda_results,
         }
         # Upsert document (insert if not exists, update if exists)
@@ -164,6 +167,10 @@ def collect():
         logger.info("=" * 60)
         logger.info("RUNNING ANALYSIS - %s", filter_type.upper())
         logger.info("=" * 60)
         analysis_results = analyze_heated_keywords(filter_type, analyzer)
         if analysis_results:
@@ -171,7 +178,7 @@ def collect():
             # display_heated_keywords(filter_type, analysis_results)
             # Generate interactive HTML visualization
             lda_results = analyzer.get_lda_results(analysis_results['lda_results'])
-            update_lda_result(filter_type, lda_results)
     logger.info("Collection completed successfully.")
     return results

     return sentiment_scores
+def update_lda_result(filter_type, lda_results, start_date, end_date):
     """
     Update LDA results in MongoDB collection.
     Args:
         filter_type (str): Time filter type ('today', 'week', 'month')
         lda_results (list): List of topic data from get_lda_results()
+        start_date (datetime): Start date of the analysis period
+        end_date (datetime): End date of the analysis period
     Returns:
         bool: True if successful, False otherwise
         document = {
             '_id': filter_type,
             'result': lda_results,
+            'dateRange': f"{start_date.strftime('%Y-%m-%d')} - {end_date.strftime('%Y-%m-%d')}"
         }
         # Upsert document (insert if not exists, update if exists)
         logger.info("=" * 60)
         logger.info("RUNNING ANALYSIS - %s", filter_type.upper())
         logger.info("=" * 60)
+        # Get time range for this filter type
+        current_start, current_end = get_time_range(filter_type)
         analysis_results = analyze_heated_keywords(filter_type, analyzer)
         if analysis_results:
             # display_heated_keywords(filter_type, analysis_results)
             # Generate interactive HTML visualization
             lda_results = analyzer.get_lda_results(analysis_results['lda_results'])
+            update_lda_result(filter_type, lda_results, current_start, current_end)
     logger.info("Collection completed successfully.")
     return results

app/jobs.json CHANGED Viewed

@@ -3,15 +3,15 @@
         "id": "finfast.article",
         "func": "collectors.finfast.article:collect",
         "trigger": "cron",
-        "hour": 23,
-        "minute": 30
     },
     {
         "id": "finfast.entity",
         "func": "collectors.finfast.entity:collect",
         "trigger": "cron",
-        "hour": 23,
-        "minute": 45
     },
     {
         "id": "daily_category_update",
@@ -24,15 +24,15 @@
         "id": "daily_keyword_analysis_pipeline",
         "func": "collectors.finfast.keyword_analysis:collect",
         "trigger": "cron",
-        "hour": 22,
-        "minute": 45
     },
     {
         "id": "finfast.lda",
         "func": "collectors.finfast.lda:collect",
         "trigger": "cron",
         "hour": 23,
-        "minute": 45
     }
 ]

         "id": "finfast.article",
         "func": "collectors.finfast.article:collect",
         "trigger": "cron",
+        "hour": 20,
+        "minute": 0
     },
     {
         "id": "finfast.entity",
         "func": "collectors.finfast.entity:collect",
         "trigger": "cron",
+        "hour": 20,
+        "minute": 30
     },
     {
         "id": "daily_category_update",
         "id": "daily_keyword_analysis_pipeline",
         "func": "collectors.finfast.keyword_analysis:collect",
         "trigger": "cron",
+        "hour": 21,
+        "minute": 0
     },
     {
         "id": "finfast.lda",
         "func": "collectors.finfast.lda:collect",
         "trigger": "cron",
         "hour": 23,
+        "minute": 0
     }
 ]

app/routes/lda.py CHANGED Viewed

@@ -30,7 +30,7 @@ async def get_lda_results(filter_type: str):
                 'error': 'Invalid filter_type',
                 'message': f'filter_type must be one of: {valid_filters}'
             }, status_code=400)
         if filter_type == 'today':
             filter_type = 'daily'
@@ -50,7 +50,8 @@ async def get_lda_results(filter_type: str):
         return JSONResponse(content={
             'success': True,
             'filter_type': filter_type,
-            'results': document['result']
         }, status_code=200)
     except Exception as e: # pylint: disable=broad-exception-caught
@@ -73,7 +74,7 @@ async def get_all_lda_results():
         # Retrieve all from MongoDB
         documents = list(lda_collection.find())
-        # Remove MongoDB ObjectIds for JSON serialization
         for document in documents:
             document.pop('_id', None)

                 'error': 'Invalid filter_type',
                 'message': f'filter_type must be one of: {valid_filters}'
             }, status_code=400)
         if filter_type == 'today':
             filter_type = 'daily'
         return JSONResponse(content={
             'success': True,
             'filter_type': filter_type,
+            'results': document['result'],
+            'dateRange': document.get('dateRange', 'N/A')
         }, status_code=200)
     except Exception as e: # pylint: disable=broad-exception-caught
         # Retrieve all from MongoDB
         documents = list(lda_collection.find())
+        # Remove MongoDB ObjectIds for JSON serialization and preserve dateRange
         for document in documents:
             document.pop('_id', None)

app/routes/summary.py CHANGED Viewed

@@ -139,13 +139,13 @@ async def get_summary_module(module: str) -> JSONResponse:
 #     ) -> JSONResponse:
 #     """
 #     Get document statistics for a specific user.
 #     This endpoint counts the number of unique emails and files uploaded by the user.
 #     It groups documents by metadata.id to ensure unique document counting (not chunks).
 #     Args:
 #         email (str): The user's email address
 #     Returns:
 #         JSONResponse: A JSON response containing document counts:
 #         {
@@ -154,7 +154,7 @@ async def get_summary_module(module: str) -> JSONResponse:
 #             "files": 12,
 #             "total_documents": 17
 #         }
 #     Raises:
 #         HTTPException: 400 for invalid email, 500 for database errors
 #     """

 #     ) -> JSONResponse:
 #     """
 #     Get document statistics for a specific user.
 #     This endpoint counts the number of unique emails and files uploaded by the user.
 #     It groups documents by metadata.id to ensure unique document counting (not chunks).
 #     Args:
 #         email (str): The user's email address
 #     Returns:
 #         JSONResponse: A JSON response containing document counts:
 #         {
 #             "files": 12,
 #             "total_documents": 17
 #         }
 #     Raises:
 #         HTTPException: 400 for invalid email, 500 for database errors
 #     """