Muhammad Abdur Rahman Saad
commited on
Commit
·
b0e3d06
1
Parent(s):
680b2ea
modification to LDA filter and doc counting
Browse files- app/controllers/lda.py +5 -11
- app/routes/lda.py +1 -1
app/controllers/lda.py
CHANGED
@@ -645,18 +645,12 @@ class HeatedKeywordsAnalyzer: # pylint: disable=too-many-instance-attributes
|
|
645 |
"""
|
646 |
Dynamically determines the optimal number of topics for a gensim model.
|
647 |
"""
|
648 |
-
if documents_count <
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
# With a moderate number of documents, we can explore a slightly larger range.
|
653 |
-
# The lower limit is now increased to 4.
|
654 |
-
topic_range = range(4, min(8, documents_count // 3))
|
655 |
else:
|
656 |
-
|
657 |
-
# The upper limit is now the smaller of 25 or a fifth of the document count,
|
658 |
-
# allowing it to scale for hundreds of documents.
|
659 |
-
topic_range = range(8, min(25, documents_count // 5))
|
660 |
|
661 |
if len(topic_range) < 2:
|
662 |
return max(2, min(3, documents_count))
|
|
|
645 |
"""
|
646 |
Dynamically determines the optimal number of topics for a gensim model.
|
647 |
"""
|
648 |
+
if documents_count < 100:
|
649 |
+
topic_range = range(6, min(12, documents_count // 8))
|
650 |
+
elif documents_count< 300:
|
651 |
+
topic_range = range(12, min(20, documents_count // 10))
|
|
|
|
|
|
|
652 |
else:
|
653 |
+
topic_range = range(20, min(35, documents_count // 25))
|
|
|
|
|
|
|
654 |
|
655 |
if len(topic_range) < 2:
|
656 |
return max(2, min(3, documents_count))
|
app/routes/lda.py
CHANGED
@@ -24,7 +24,7 @@ async def get_lda_results(filter_type: str):
|
|
24 |
"""
|
25 |
try:
|
26 |
# Validate filter_type
|
27 |
-
valid_filters = ['
|
28 |
if filter_type not in valid_filters:
|
29 |
return JSONResponse(content={
|
30 |
'error': 'Invalid filter_type',
|
|
|
24 |
"""
|
25 |
try:
|
26 |
# Validate filter_type
|
27 |
+
valid_filters = ['daily', 'week', 'month']
|
28 |
if filter_type not in valid_filters:
|
29 |
return JSONResponse(content={
|
30 |
'error': 'Invalid filter_type',
|