Muhammad Abdur Rahman Saad commited on
Commit
b0e3d06
·
1 Parent(s): 680b2ea

modification to LDA filter and doc counting

Browse files
Files changed (2) hide show
  1. app/controllers/lda.py +5 -11
  2. app/routes/lda.py +1 -1
app/controllers/lda.py CHANGED
@@ -645,18 +645,12 @@ class HeatedKeywordsAnalyzer: # pylint: disable=too-many-instance-attributes
645
  """
646
  Dynamically determines the optimal number of topics for a gensim model.
647
  """
648
- if documents_count < 20:
649
- # For very few documents, keep the range small and conservative.
650
- topic_range = range(2, min(5, documents_count))
651
- elif documents_count < 50:
652
- # With a moderate number of documents, we can explore a slightly larger range.
653
- # The lower limit is now increased to 4.
654
- topic_range = range(4, min(8, documents_count // 3))
655
  else:
656
- # For 50 or more documents, start with at least 8 topics and have a higher upper bound.
657
- # The upper limit is now the smaller of 25 or a fifth of the document count,
658
- # allowing it to scale for hundreds of documents.
659
- topic_range = range(8, min(25, documents_count // 5))
660
 
661
  if len(topic_range) < 2:
662
  return max(2, min(3, documents_count))
 
645
  """
646
  Dynamically determines the optimal number of topics for a gensim model.
647
  """
648
+ if documents_count < 100:
649
+ topic_range = range(6, min(12, documents_count // 8))
650
+ elif documents_count< 300:
651
+ topic_range = range(12, min(20, documents_count // 10))
 
 
 
652
  else:
653
+ topic_range = range(20, min(35, documents_count // 25))
 
 
 
654
 
655
  if len(topic_range) < 2:
656
  return max(2, min(3, documents_count))
app/routes/lda.py CHANGED
@@ -24,7 +24,7 @@ async def get_lda_results(filter_type: str):
24
  """
25
  try:
26
  # Validate filter_type
27
- valid_filters = ['today', 'week', 'month']
28
  if filter_type not in valid_filters:
29
  return JSONResponse(content={
30
  'error': 'Invalid filter_type',
 
24
  """
25
  try:
26
  # Validate filter_type
27
+ valid_filters = ['daily', 'week', 'month']
28
  if filter_type not in valid_filters:
29
  return JSONResponse(content={
30
  'error': 'Invalid filter_type',