Muhammad Abdur Rahman Saad commited on
Commit
66aefd0
·
unverified ·
2 Parent(s): 3e686b1 0e34a58

Merge pull request #47 from oxbridge-econ/abdur/fix/word-cloud

Browse files
app/collectors/finfast/lda.py CHANGED
@@ -106,13 +106,15 @@ def calculate_keyword_sentiments(documents, term_frequencies, analyzer):
106
 
107
  return sentiment_scores
108
 
109
- def update_lda_result(filter_type, lda_results):
110
  """
111
  Update LDA results in MongoDB collection.
112
 
113
  Args:
114
  filter_type (str): Time filter type ('today', 'week', 'month')
115
  lda_results (list): List of topic data from get_lda_results()
 
 
116
 
117
  Returns:
118
  bool: True if successful, False otherwise
@@ -122,6 +124,7 @@ def update_lda_result(filter_type, lda_results):
122
  document = {
123
  '_id': filter_type,
124
  'result': lda_results,
 
125
  }
126
 
127
  # Upsert document (insert if not exists, update if exists)
@@ -164,6 +167,10 @@ def collect():
164
  logger.info("=" * 60)
165
  logger.info("RUNNING ANALYSIS - %s", filter_type.upper())
166
  logger.info("=" * 60)
 
 
 
 
167
  analysis_results = analyze_heated_keywords(filter_type, analyzer)
168
 
169
  if analysis_results:
@@ -171,7 +178,7 @@ def collect():
171
  # display_heated_keywords(filter_type, analysis_results)
172
  # Generate interactive HTML visualization
173
  lda_results = analyzer.get_lda_results(analysis_results['lda_results'])
174
- update_lda_result(filter_type, lda_results)
175
 
176
  logger.info("Collection completed successfully.")
177
  return results
 
106
 
107
  return sentiment_scores
108
 
109
+ def update_lda_result(filter_type, lda_results, start_date, end_date):
110
  """
111
  Update LDA results in MongoDB collection.
112
 
113
  Args:
114
  filter_type (str): Time filter type ('today', 'week', 'month')
115
  lda_results (list): List of topic data from get_lda_results()
116
+ start_date (datetime): Start date of the analysis period
117
+ end_date (datetime): End date of the analysis period
118
 
119
  Returns:
120
  bool: True if successful, False otherwise
 
124
  document = {
125
  '_id': filter_type,
126
  'result': lda_results,
127
+ 'dateRange': f"{start_date.strftime('%Y-%m-%d')} - {end_date.strftime('%Y-%m-%d')}"
128
  }
129
 
130
  # Upsert document (insert if not exists, update if exists)
 
167
  logger.info("=" * 60)
168
  logger.info("RUNNING ANALYSIS - %s", filter_type.upper())
169
  logger.info("=" * 60)
170
+
171
+ # Get time range for this filter type
172
+ current_start, current_end = get_time_range(filter_type)
173
+
174
  analysis_results = analyze_heated_keywords(filter_type, analyzer)
175
 
176
  if analysis_results:
 
178
  # display_heated_keywords(filter_type, analysis_results)
179
  # Generate interactive HTML visualization
180
  lda_results = analyzer.get_lda_results(analysis_results['lda_results'])
181
+ update_lda_result(filter_type, lda_results, current_start, current_end)
182
 
183
  logger.info("Collection completed successfully.")
184
  return results
app/jobs.json CHANGED
@@ -3,15 +3,15 @@
3
  "id": "finfast.article",
4
  "func": "collectors.finfast.article:collect",
5
  "trigger": "cron",
6
- "hour": 23,
7
- "minute": 30
8
  },
9
  {
10
  "id": "finfast.entity",
11
  "func": "collectors.finfast.entity:collect",
12
  "trigger": "cron",
13
- "hour": 23,
14
- "minute": 45
15
  },
16
  {
17
  "id": "daily_category_update",
@@ -24,15 +24,15 @@
24
  "id": "daily_keyword_analysis_pipeline",
25
  "func": "collectors.finfast.keyword_analysis:collect",
26
  "trigger": "cron",
27
- "hour": 22,
28
- "minute": 45
29
  },
30
  {
31
  "id": "finfast.lda",
32
  "func": "collectors.finfast.lda:collect",
33
  "trigger": "cron",
34
  "hour": 23,
35
- "minute": 45
36
  }
37
 
38
  ]
 
3
  "id": "finfast.article",
4
  "func": "collectors.finfast.article:collect",
5
  "trigger": "cron",
6
+ "hour": 20,
7
+ "minute": 0
8
  },
9
  {
10
  "id": "finfast.entity",
11
  "func": "collectors.finfast.entity:collect",
12
  "trigger": "cron",
13
+ "hour": 20,
14
+ "minute": 30
15
  },
16
  {
17
  "id": "daily_category_update",
 
24
  "id": "daily_keyword_analysis_pipeline",
25
  "func": "collectors.finfast.keyword_analysis:collect",
26
  "trigger": "cron",
27
+ "hour": 21,
28
+ "minute": 0
29
  },
30
  {
31
  "id": "finfast.lda",
32
  "func": "collectors.finfast.lda:collect",
33
  "trigger": "cron",
34
  "hour": 23,
35
+ "minute": 0
36
  }
37
 
38
  ]
app/routes/lda.py CHANGED
@@ -30,7 +30,7 @@ async def get_lda_results(filter_type: str):
30
  'error': 'Invalid filter_type',
31
  'message': f'filter_type must be one of: {valid_filters}'
32
  }, status_code=400)
33
-
34
  if filter_type == 'today':
35
  filter_type = 'daily'
36
 
@@ -50,7 +50,8 @@ async def get_lda_results(filter_type: str):
50
  return JSONResponse(content={
51
  'success': True,
52
  'filter_type': filter_type,
53
- 'results': document['result']
 
54
  }, status_code=200)
55
 
56
  except Exception as e: # pylint: disable=broad-exception-caught
@@ -73,7 +74,7 @@ async def get_all_lda_results():
73
  # Retrieve all from MongoDB
74
  documents = list(lda_collection.find())
75
 
76
- # Remove MongoDB ObjectIds for JSON serialization
77
  for document in documents:
78
  document.pop('_id', None)
79
 
 
30
  'error': 'Invalid filter_type',
31
  'message': f'filter_type must be one of: {valid_filters}'
32
  }, status_code=400)
33
+
34
  if filter_type == 'today':
35
  filter_type = 'daily'
36
 
 
50
  return JSONResponse(content={
51
  'success': True,
52
  'filter_type': filter_type,
53
+ 'results': document['result'],
54
+ 'dateRange': document.get('dateRange', 'N/A')
55
  }, status_code=200)
56
 
57
  except Exception as e: # pylint: disable=broad-exception-caught
 
74
  # Retrieve all from MongoDB
75
  documents = list(lda_collection.find())
76
 
77
+ # Remove MongoDB ObjectIds for JSON serialization and preserve dateRange
78
  for document in documents:
79
  document.pop('_id', None)
80
 
app/routes/summary.py CHANGED
@@ -139,13 +139,13 @@ async def get_summary_module(module: str) -> JSONResponse:
139
  # ) -> JSONResponse:
140
  # """
141
  # Get document statistics for a specific user.
142
-
143
  # This endpoint counts the number of unique emails and files uploaded by the user.
144
  # It groups documents by metadata.id to ensure unique document counting (not chunks).
145
-
146
  # Args:
147
  # email (str): The user's email address
148
-
149
  # Returns:
150
  # JSONResponse: A JSON response containing document counts:
151
  # {
@@ -154,7 +154,7 @@ async def get_summary_module(module: str) -> JSONResponse:
154
  # "files": 12,
155
  # "total_documents": 17
156
  # }
157
-
158
  # Raises:
159
  # HTTPException: 400 for invalid email, 500 for database errors
160
  # """
 
139
  # ) -> JSONResponse:
140
  # """
141
  # Get document statistics for a specific user.
142
+
143
  # This endpoint counts the number of unique emails and files uploaded by the user.
144
  # It groups documents by metadata.id to ensure unique document counting (not chunks).
145
+
146
  # Args:
147
  # email (str): The user's email address
148
+
149
  # Returns:
150
  # JSONResponse: A JSON response containing document counts:
151
  # {
 
154
  # "files": 12,
155
  # "total_documents": 17
156
  # }
157
+
158
  # Raises:
159
  # HTTPException: 400 for invalid email, 500 for database errors
160
  # """