Muhammad Abdur Rahman Saad
commited on
Commit
·
72f4cb5
1
Parent(s):
91abca2
fix pylint errors
Browse files- .github/workflows/pylint.yml +1 -1
- app/app.py +2 -1
- app/collectors/finfast/article.py +1 -1
- app/collectors/finfast/category.py +1 -1
- app/collectors/finfast/entity.py +1 -1
- app/collectors/finfast/keyword.py +2 -2
- app/collectors/finfast/keyword_analysis.py +3 -3
- app/collectors/finfast/lda.py +4 -4
- app/controllers/category.py +1 -1
- app/controllers/keyword.py +2 -2
- app/controllers/keyword_analysis.py +2 -2
- app/controllers/lda.py +8 -8
- app/controllers/summary/utils.py +1 -1
- app/routes/category.py +1 -1
- app/routes/keyword.py +7 -8
- app/routes/lda.py +1 -1
- app/routes/summary.py +1 -1
.github/workflows/pylint.yml
CHANGED
|
@@ -23,4 +23,4 @@ jobs:
|
|
| 23 |
pip install -r app/requirements.txt
|
| 24 |
- name: Analysing the code with pylint
|
| 25 |
run: |
|
| 26 |
-
pylint $(git ls-files '*.py')
|
|
|
|
| 23 |
pip install -r app/requirements.txt
|
| 24 |
- name: Analysing the code with pylint
|
| 25 |
run: |
|
| 26 |
+
pylint --disable=R0801 $(git ls-files '*.py')
|
app/app.py
CHANGED
|
@@ -9,7 +9,8 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 9 |
from fastapi.responses import JSONResponse
|
| 10 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 11 |
from apscheduler.triggers.cron import CronTrigger
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
class Config: # pylint: disable=too-few-public-methods
|
|
|
|
| 9 |
from fastapi.responses import JSONResponse
|
| 10 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 11 |
from apscheduler.triggers.cron import CronTrigger
|
| 12 |
+
|
| 13 |
+
from routes import category, summary, keyword, lda # pylint: disable=import-error
|
| 14 |
|
| 15 |
|
| 16 |
class Config: # pylint: disable=too-few-public-methods
|
app/collectors/finfast/article.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from venv import logger
|
| 3 |
from datetime import datetime, timedelta
|
| 4 |
from pymongo.errors import PyMongoError
|
| 5 |
-
from models.database import article_collection
|
| 6 |
|
| 7 |
from .utils import scan_dynamodb_table, delete_old_documents, upsert_item
|
| 8 |
|
|
|
|
| 2 |
from venv import logger
|
| 3 |
from datetime import datetime, timedelta
|
| 4 |
from pymongo.errors import PyMongoError
|
| 5 |
+
from models.database import article_collection # pylint: disable=import-error
|
| 6 |
|
| 7 |
from .utils import scan_dynamodb_table, delete_old_documents, upsert_item
|
| 8 |
|
app/collectors/finfast/category.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Dict, List, Tuple
|
|
| 11 |
from collections import defaultdict
|
| 12 |
from dataclasses import dataclass
|
| 13 |
from botocore.exceptions import ClientError
|
| 14 |
-
from models.database import category_collection
|
| 15 |
|
| 16 |
from ..utils import get_client_connection
|
| 17 |
|
|
|
|
| 11 |
from collections import defaultdict
|
| 12 |
from dataclasses import dataclass
|
| 13 |
from botocore.exceptions import ClientError
|
| 14 |
+
from models.database import category_collection # pylint: disable=import-error
|
| 15 |
|
| 16 |
from ..utils import get_client_connection
|
| 17 |
|
app/collectors/finfast/entity.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""Module for collecting and managing entity data from DynamoDB to MongoDB."""
|
| 2 |
from datetime import datetime, timedelta
|
| 3 |
from pymongo.errors import PyMongoError
|
| 4 |
-
from models.database import entity_collection
|
| 5 |
|
| 6 |
from .utils import scan_dynamodb_table, delete_old_documents, upsert_item
|
| 7 |
|
|
|
|
| 1 |
"""Module for collecting and managing entity data from DynamoDB to MongoDB."""
|
| 2 |
from datetime import datetime, timedelta
|
| 3 |
from pymongo.errors import PyMongoError
|
| 4 |
+
from models.database import entity_collection # pylint: disable=import-error
|
| 5 |
|
| 6 |
from .utils import scan_dynamodb_table, delete_old_documents, upsert_item
|
| 7 |
|
app/collectors/finfast/keyword.py
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
from datetime import datetime, timedelta
|
| 3 |
import logging
|
| 4 |
|
| 5 |
-
from models.database import article_collection, keywords_collection
|
| 6 |
-
from controllers.keyword import (
|
| 7 |
fetch_articles_for_period,
|
| 8 |
fetch_historical_keywords,
|
| 9 |
run_llm_extraction,
|
|
|
|
| 2 |
from datetime import datetime, timedelta
|
| 3 |
import logging
|
| 4 |
|
| 5 |
+
from models.database import article_collection, keywords_collection # pylint: disable=import-error
|
| 6 |
+
from controllers.keyword import ( # pylint: disable=import-error
|
| 7 |
fetch_articles_for_period,
|
| 8 |
fetch_historical_keywords,
|
| 9 |
run_llm_extraction,
|
app/collectors/finfast/keyword_analysis.py
CHANGED
|
@@ -4,14 +4,14 @@ Daily keyword pipeline collector for combined keyword detection, analysis, and c
|
|
| 4 |
from datetime import datetime
|
| 5 |
import logging
|
| 6 |
|
| 7 |
-
from models.database import article_collection, keywords_collection, summary_collection
|
| 8 |
-
from controllers.keyword import (
|
| 9 |
fetch_articles_for_period,
|
| 10 |
fetch_historical_keywords,
|
| 11 |
run_llm_extraction,
|
| 12 |
calculate_metrics_and_save_for_date
|
| 13 |
)
|
| 14 |
-
from controllers.keyword_analysis import (
|
| 15 |
analyze_keywords_from_database,
|
| 16 |
save_summary_to_database,
|
| 17 |
cleanup_old_keywords
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
import logging
|
| 6 |
|
| 7 |
+
from models.database import article_collection, keywords_collection, summary_collection # pylint: disable=import-error
|
| 8 |
+
from controllers.keyword import ( # pylint: disable=import-error
|
| 9 |
fetch_articles_for_period,
|
| 10 |
fetch_historical_keywords,
|
| 11 |
run_llm_extraction,
|
| 12 |
calculate_metrics_and_save_for_date
|
| 13 |
)
|
| 14 |
+
from controllers.keyword_analysis import ( # pylint: disable=import-error
|
| 15 |
analyze_keywords_from_database,
|
| 16 |
save_summary_to_database,
|
| 17 |
cleanup_old_keywords
|
app/collectors/finfast/lda.py
CHANGED
|
@@ -3,15 +3,15 @@ LDA collector for topic modeling and visualization.
|
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
|
| 6 |
-
from controllers.lda import HeatedKeywordsAnalyzer
|
| 7 |
-
from controllers.keyword_analysis import get_time_range, get_previous_time_range
|
| 8 |
-
from models.database import article_collection, keywords_collection, lda_collection
|
| 9 |
|
| 10 |
|
| 11 |
# Configure logger
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
-
def analyze_heated_keywords(filter_type, analyzer=None):
|
| 15 |
"""
|
| 16 |
Analyzes heated keywords for a specific time period.
|
| 17 |
|
|
|
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
|
| 6 |
+
from controllers.lda import HeatedKeywordsAnalyzer # pylint: disable=import-error
|
| 7 |
+
from controllers.keyword_analysis import get_time_range, get_previous_time_range # pylint: disable=import-error
|
| 8 |
+
from models.database import article_collection, keywords_collection, lda_collection # pylint: disable=import-error
|
| 9 |
|
| 10 |
|
| 11 |
# Configure logger
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
+
def analyze_heated_keywords(filter_type, analyzer=None): # pylint: disable=too-many-locals
|
| 15 |
"""
|
| 16 |
Analyzes heated keywords for a specific time period.
|
| 17 |
|
app/controllers/category.py
CHANGED
|
@@ -4,7 +4,7 @@ Category Controller - Business logic for handling category data.
|
|
| 4 |
This module contains functions that interact with the database
|
| 5 |
to fetch and process data sorted by category
|
| 6 |
"""
|
| 7 |
-
from models.database import category_collection
|
| 8 |
|
| 9 |
def get_categories():
|
| 10 |
|
|
|
|
| 4 |
This module contains functions that interact with the database
|
| 5 |
to fetch and process data sorted by category
|
| 6 |
"""
|
| 7 |
+
from models.database import category_collection # pylint: disable=import-error
|
| 8 |
|
| 9 |
def get_categories():
|
| 10 |
|
app/controllers/keyword.py
CHANGED
|
@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
|
|
| 10 |
from langchain.output_parsers import PydanticOutputParser
|
| 11 |
from langchain.prompts import ChatPromptTemplate
|
| 12 |
|
| 13 |
-
from models.llm import gpt
|
| 14 |
|
| 15 |
# Configure logger
|
| 16 |
logger = logging.getLogger(__name__)
|
|
@@ -687,7 +687,7 @@ def invoke_llm_for_batch(articles_in_batch: list, historical_keywords: set) -> L
|
|
| 687 |
logger.error("LLM batch invocation failed: %s", e)
|
| 688 |
return []
|
| 689 |
|
| 690 |
-
def calculate_metrics_and_save_for_date(
|
| 691 |
collection,
|
| 692 |
today_keywords: List[Keyword],
|
| 693 |
target_date: datetime
|
|
|
|
| 10 |
from langchain.output_parsers import PydanticOutputParser
|
| 11 |
from langchain.prompts import ChatPromptTemplate
|
| 12 |
|
| 13 |
+
from models.llm import gpt # pylint: disable=import-error
|
| 14 |
|
| 15 |
# Configure logger
|
| 16 |
logger = logging.getLogger(__name__)
|
|
|
|
| 687 |
logger.error("LLM batch invocation failed: %s", e)
|
| 688 |
return []
|
| 689 |
|
| 690 |
+
def calculate_metrics_and_save_for_date( # pylint: disable=too-many-locals
|
| 691 |
collection,
|
| 692 |
today_keywords: List[Keyword],
|
| 693 |
target_date: datetime
|
app/controllers/keyword_analysis.py
CHANGED
|
@@ -68,7 +68,7 @@ def get_previous_time_range(
|
|
| 68 |
|
| 69 |
return previous_start, previous_end
|
| 70 |
|
| 71 |
-
def calculate_heating_scores_from_database(
|
| 72 |
filter_type: str,
|
| 73 |
keywords_collection
|
| 74 |
) -> tuple[dict, dict, dict, dict]: # Add dict for categories
|
|
@@ -122,7 +122,7 @@ def calculate_heating_scores_from_database(
|
|
| 122 |
keyword_categories[keyword] = category
|
| 123 |
|
| 124 |
# For daily analysis, if no current keywords found, try to find most recent data
|
| 125 |
-
if filter_type == "today" and current_docs_found == 0:
|
| 126 |
logger.warning("No keywords found for today. Looking for most recent available data...")
|
| 127 |
|
| 128 |
# Find the most recent date with keywords
|
|
|
|
| 68 |
|
| 69 |
return previous_start, previous_end
|
| 70 |
|
| 71 |
+
def calculate_heating_scores_from_database( # pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
| 72 |
filter_type: str,
|
| 73 |
keywords_collection
|
| 74 |
) -> tuple[dict, dict, dict, dict]: # Add dict for categories
|
|
|
|
| 122 |
keyword_categories[keyword] = category
|
| 123 |
|
| 124 |
# For daily analysis, if no current keywords found, try to find most recent data
|
| 125 |
+
if filter_type == "today" and current_docs_found == 0: # pylint: disable=too-many-nested-blocks
|
| 126 |
logger.warning("No keywords found for today. Looking for most recent available data...")
|
| 127 |
|
| 128 |
# Find the most recent date with keywords
|
app/controllers/lda.py
CHANGED
|
@@ -19,7 +19,7 @@ from gensim.models.coherencemodel import CoherenceModel
|
|
| 19 |
import requests
|
| 20 |
from langchain.schema import SystemMessage, HumanMessage
|
| 21 |
|
| 22 |
-
from models.llm import gpt
|
| 23 |
from .keyword import FIN_KEYWORDS
|
| 24 |
from .keyword_analysis import get_time_range, get_previous_time_range
|
| 25 |
|
|
@@ -108,7 +108,7 @@ class FinancialKeywordManager:
|
|
| 108 |
# If all checks pass, the keyword is considered valid
|
| 109 |
return True
|
| 110 |
|
| 111 |
-
def fetch_from_eodhd(self, days_back=30):
|
| 112 |
"""
|
| 113 |
Fetch trending financial keywords from EODHD API.
|
| 114 |
|
|
@@ -137,7 +137,7 @@ class FinancialKeywordManager:
|
|
| 137 |
'central-bank,monetary-policy,fiscal-policy')
|
| 138 |
}
|
| 139 |
|
| 140 |
-
try:
|
| 141 |
response = requests.get(base_url, params=params, timeout=60)
|
| 142 |
if response.status_code == 200:
|
| 143 |
articles = response.json()
|
|
@@ -208,7 +208,7 @@ class FinancialKeywordManager:
|
|
| 208 |
logger.error("EODHD API error: %s", e)
|
| 209 |
return set()
|
| 210 |
|
| 211 |
-
class HeatedKeywordsAnalyzer:
|
| 212 |
"""
|
| 213 |
This class handles the extraction of financial terms from articles, calculates heating scores,
|
| 214 |
performs LDA topic clustering, and analyzes sentiment of financial terms.
|
|
@@ -329,7 +329,7 @@ class HeatedKeywordsAnalyzer:
|
|
| 329 |
})
|
| 330 |
return documents
|
| 331 |
|
| 332 |
-
def extract_financial_terms(self, text):
|
| 333 |
"""
|
| 334 |
Extract financial terms from text with advanced matching techniques.
|
| 335 |
|
|
@@ -472,7 +472,7 @@ class HeatedKeywordsAnalyzer:
|
|
| 472 |
logger.error("Sentiment analysis error: %s", e)
|
| 473 |
return 'neutral', 0.5
|
| 474 |
|
| 475 |
-
def calculate_heating_scores(self, current_docs, previous_docs):
|
| 476 |
"""
|
| 477 |
Calculate heating scores and perform LDA clustering.
|
| 478 |
|
|
@@ -641,7 +641,7 @@ class HeatedKeywordsAnalyzer:
|
|
| 641 |
dense_matrix[i, topic_id] = prob
|
| 642 |
return dense_matrix
|
| 643 |
|
| 644 |
-
def find_optimal_topics_gensim(self, corpus, id2word, tokenized_texts, documents_count):
|
| 645 |
"""
|
| 646 |
Dynamically determines the optimal number of topics for a gensim model.
|
| 647 |
"""
|
|
@@ -970,7 +970,7 @@ class HeatedKeywordsAnalyzer:
|
|
| 970 |
})
|
| 971 |
return topic_data
|
| 972 |
|
| 973 |
-
def analyze_heated_keywords(self, filter_type, analyzer=None):
|
| 974 |
"""
|
| 975 |
Analyzes heated keywords for a specific time period.
|
| 976 |
|
|
|
|
| 19 |
import requests
|
| 20 |
from langchain.schema import SystemMessage, HumanMessage
|
| 21 |
|
| 22 |
+
from models.llm import gpt # pylint: disable=import-error
|
| 23 |
from .keyword import FIN_KEYWORDS
|
| 24 |
from .keyword_analysis import get_time_range, get_previous_time_range
|
| 25 |
|
|
|
|
| 108 |
# If all checks pass, the keyword is considered valid
|
| 109 |
return True
|
| 110 |
|
| 111 |
+
def fetch_from_eodhd(self, days_back=30): # pylint: disable=too-many-locals, too-many-branches
|
| 112 |
"""
|
| 113 |
Fetch trending financial keywords from EODHD API.
|
| 114 |
|
|
|
|
| 137 |
'central-bank,monetary-policy,fiscal-policy')
|
| 138 |
}
|
| 139 |
|
| 140 |
+
try: # pylint: disable=too-many-nested-blocks
|
| 141 |
response = requests.get(base_url, params=params, timeout=60)
|
| 142 |
if response.status_code == 200:
|
| 143 |
articles = response.json()
|
|
|
|
| 208 |
logger.error("EODHD API error: %s", e)
|
| 209 |
return set()
|
| 210 |
|
| 211 |
+
class HeatedKeywordsAnalyzer: # pylint: disable=too-many-instance-attributes
|
| 212 |
"""
|
| 213 |
This class handles the extraction of financial terms from articles, calculates heating scores,
|
| 214 |
performs LDA topic clustering, and analyzes sentiment of financial terms.
|
|
|
|
| 329 |
})
|
| 330 |
return documents
|
| 331 |
|
| 332 |
+
def extract_financial_terms(self, text): # pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
| 333 |
"""
|
| 334 |
Extract financial terms from text with advanced matching techniques.
|
| 335 |
|
|
|
|
| 472 |
logger.error("Sentiment analysis error: %s", e)
|
| 473 |
return 'neutral', 0.5
|
| 474 |
|
| 475 |
+
def calculate_heating_scores(self, current_docs, previous_docs): # pylint: disable=too-many-locals
|
| 476 |
"""
|
| 477 |
Calculate heating scores and perform LDA clustering.
|
| 478 |
|
|
|
|
| 641 |
dense_matrix[i, topic_id] = prob
|
| 642 |
return dense_matrix
|
| 643 |
|
| 644 |
+
def find_optimal_topics_gensim(self, corpus, id2word, tokenized_texts, documents_count): # pylint: disable=too-many-locals
|
| 645 |
"""
|
| 646 |
Dynamically determines the optimal number of topics for a gensim model.
|
| 647 |
"""
|
|
|
|
| 970 |
})
|
| 971 |
return topic_data
|
| 972 |
|
| 973 |
+
def analyze_heated_keywords(self, filter_type, analyzer=None): # pylint: disable=too-many-locals
|
| 974 |
"""
|
| 975 |
Analyzes heated keywords for a specific time period.
|
| 976 |
|
app/controllers/summary/utils.py
CHANGED
|
@@ -6,7 +6,7 @@ from datetime import datetime, timedelta
|
|
| 6 |
from typing import Dict, Any
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
-
from models.database import article_collection, entity_collection
|
| 10 |
|
| 11 |
# Entity type full names mapping
|
| 12 |
ENTITY_TYPE_FULL_NAMES = {
|
|
|
|
| 6 |
from typing import Dict, Any
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
+
from models.database import article_collection, entity_collection # pylint: disable=import-error
|
| 10 |
|
| 11 |
# Entity type full names mapping
|
| 12 |
ENTITY_TYPE_FULL_NAMES = {
|
app/routes/category.py
CHANGED
|
@@ -9,7 +9,7 @@ Routes:
|
|
| 9 |
"""
|
| 10 |
from fastapi import APIRouter
|
| 11 |
from fastapi.responses import JSONResponse
|
| 12 |
-
from controllers.category import get_categories
|
| 13 |
|
| 14 |
router = APIRouter(prefix="/category", tags=["category"])
|
| 15 |
|
|
|
|
| 9 |
"""
|
| 10 |
from fastapi import APIRouter
|
| 11 |
from fastapi.responses import JSONResponse
|
| 12 |
+
from controllers.category import get_categories # pylint: disable=import-error
|
| 13 |
|
| 14 |
router = APIRouter(prefix="/category", tags=["category"])
|
| 15 |
|
app/routes/keyword.py
CHANGED
|
@@ -3,8 +3,8 @@ import logging
|
|
| 3 |
from fastapi import APIRouter
|
| 4 |
from fastapi.responses import JSONResponse
|
| 5 |
|
| 6 |
-
from collectors.finfast.keyword import collect as run_historical_backfill
|
| 7 |
-
from models.database import keywords_collection, summary_collection
|
| 8 |
|
| 9 |
# Configure logger
|
| 10 |
logger = logging.getLogger(__name__)
|
|
@@ -155,12 +155,11 @@ async def get_summary_by_period(period: str):
|
|
| 155 |
"categories": summary_doc.get("categories", [])
|
| 156 |
}
|
| 157 |
}, status_code=200)
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
}, status_code=404)
|
| 164 |
|
| 165 |
except Exception as e: # pylint: disable=broad-exception-caught
|
| 166 |
logger.error("Error getting %s summary: %s", period, e)
|
|
|
|
| 3 |
from fastapi import APIRouter
|
| 4 |
from fastapi.responses import JSONResponse
|
| 5 |
|
| 6 |
+
from collectors.finfast.keyword import collect as run_historical_backfill # pylint: disable=import-error
|
| 7 |
+
from models.database import keywords_collection, summary_collection # pylint: disable=import-error
|
| 8 |
|
| 9 |
# Configure logger
|
| 10 |
logger = logging.getLogger(__name__)
|
|
|
|
| 155 |
"categories": summary_doc.get("categories", [])
|
| 156 |
}
|
| 157 |
}, status_code=200)
|
| 158 |
+
return JSONResponse(content={
|
| 159 |
+
"status": "not_found",
|
| 160 |
+
"message": f"No summary available for {period}",
|
| 161 |
+
"period": period
|
| 162 |
+
}, status_code=404)
|
|
|
|
| 163 |
|
| 164 |
except Exception as e: # pylint: disable=broad-exception-caught
|
| 165 |
logger.error("Error getting %s summary: %s", period, e)
|
app/routes/lda.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
import logging
|
| 4 |
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
-
from models.database import lda_collection
|
| 7 |
|
| 8 |
# Configure logger
|
| 9 |
logger = logging.getLogger(__name__)
|
|
|
|
| 3 |
import logging
|
| 4 |
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
+
from models.database import lda_collection # pylint: disable=import-error
|
| 7 |
|
| 8 |
# Configure logger
|
| 9 |
logger = logging.getLogger(__name__)
|
app/routes/summary.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
import importlib
|
| 4 |
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
-
from controllers.summary import get_summary_data
|
| 7 |
|
| 8 |
router = APIRouter(prefix="/summary", tags=["summary"])
|
| 9 |
|
|
|
|
| 3 |
import importlib
|
| 4 |
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
+
from controllers.summary import get_summary_data # pylint: disable=import-error
|
| 7 |
|
| 8 |
router = APIRouter(prefix="/summary", tags=["summary"])
|
| 9 |
|