feat(content): update RSS feed processing to include user country and language preferences
Browse files
backend/services/content_service.py
CHANGED
|
@@ -43,7 +43,7 @@ class ContentService:
|
|
| 43 |
|
| 44 |
try:
|
| 45 |
# Check if the Hugging Face Space exists and is accessible before creating the client
|
| 46 |
-
self.client = Client("Zelyanoth/Linkedin_poster_dev",
|
| 47 |
|
| 48 |
# Log success using safe approach
|
| 49 |
try:
|
|
@@ -328,6 +328,10 @@ class ContentService:
|
|
| 328 |
|
| 329 |
user_rss_sources = rss_response.data if rss_response.data else []
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
# Analyze each RSS source for frequency of new articles/links
|
| 332 |
keyword_data = []
|
| 333 |
|
|
@@ -338,37 +342,82 @@ class ContentService:
|
|
| 338 |
rss_link = rss_source["source"]
|
| 339 |
|
| 340 |
# Check if the source is a keyword rather than an RSS URL
|
| 341 |
-
# If it's a keyword, generate a Google News RSS URL
|
| 342 |
-
if self._is_url(rss_link):
|
| 343 |
-
# It's a
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
else:
|
| 346 |
-
#
|
| 347 |
-
feed_url =
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
all_articles.append(article_data)
|
| 372 |
|
| 373 |
# Create a DataFrame from the articles
|
| 374 |
df_articles = pd.DataFrame(all_articles)
|
|
|
|
| 43 |
|
| 44 |
try:
|
| 45 |
# Check if the Hugging Face Space exists and is accessible before creating the client
|
| 46 |
+
self.client = Client("Zelyanoth/Linkedin_poster_dev", token=self.hugging_key)
|
| 47 |
|
| 48 |
# Log success using safe approach
|
| 49 |
try:
|
|
|
|
| 328 |
|
| 329 |
user_rss_sources = rss_response.data if rss_response.data else []
|
| 330 |
|
| 331 |
+
# Get user preferences for country and language
|
| 332 |
+
user_prefs = self._get_user_preferences(user_id)
|
| 333 |
+
user_country = user_prefs["country"]
|
| 334 |
+
|
| 335 |
# Analyze each RSS source for frequency of new articles/links
|
| 336 |
keyword_data = []
|
| 337 |
|
|
|
|
| 342 |
rss_link = rss_source["source"]
|
| 343 |
|
| 344 |
# Check if the source is a keyword rather than an RSS URL
|
| 345 |
+
# If it's a keyword, generate a Google News RSS URL with user's preferences
|
| 346 |
+
if not self._is_url(rss_link):
|
| 347 |
+
# It's a keyword, generate Google News RSS URLs for both English and French for user's country
|
| 348 |
+
english_feed_url = self._generate_google_news_rss_from_string(rss_link, language="en", country=user_country)
|
| 349 |
+
french_feed_url = self._generate_google_news_rss_from_string(rss_link, language="fr", country=user_country)
|
| 350 |
+
|
| 351 |
+
# Process both English and French feeds
|
| 352 |
+
english_feed = feedparser.parse(english_feed_url)
|
| 353 |
+
french_feed = feedparser.parse(french_feed_url)
|
| 354 |
+
|
| 355 |
+
# Extract articles from both feeds
|
| 356 |
+
english_articles = []
|
| 357 |
+
for entry in english_feed.entries:
|
| 358 |
+
article_data = {
|
| 359 |
+
'title': entry.title,
|
| 360 |
+
'link': entry.link,
|
| 361 |
+
'summary': entry.summary,
|
| 362 |
+
'date': entry.get('published', entry.get('updated', None)),
|
| 363 |
+
'content': entry.get('summary', '') + ' ' + entry.get('title', ''),
|
| 364 |
+
'language': 'en' # Add language indicator
|
| 365 |
+
}
|
| 366 |
+
english_articles.append(article_data)
|
| 367 |
+
|
| 368 |
+
french_articles = []
|
| 369 |
+
for entry in french_feed.entries:
|
| 370 |
+
article_data = {
|
| 371 |
+
'title': entry.title,
|
| 372 |
+
'link': entry.link,
|
| 373 |
+
'summary': entry.summary,
|
| 374 |
+
'date': entry.get('published', entry.get('updated', None)),
|
| 375 |
+
'content': entry.get('summary', '') + ' ' + entry.get('title', ''),
|
| 376 |
+
'language': 'fr' # Add language indicator
|
| 377 |
+
}
|
| 378 |
+
french_articles.append(article_data)
|
| 379 |
+
|
| 380 |
+
# Convert to DataFrames
|
| 381 |
+
english_df = pd.DataFrame(english_articles)
|
| 382 |
+
french_df = pd.DataFrame(french_articles)
|
| 383 |
+
|
| 384 |
+
# Merge dataframes to remove duplicates based on article URL
|
| 385 |
+
if not english_df.empty and not french_df.empty:
|
| 386 |
+
all_articles_df = self._merge_dataframes(english_df, french_df)
|
| 387 |
+
elif not english_df.empty:
|
| 388 |
+
all_articles_df = english_df
|
| 389 |
+
elif not french_df.empty:
|
| 390 |
+
all_articles_df = french_df
|
| 391 |
+
else:
|
| 392 |
+
all_articles_df = pd.DataFrame()
|
| 393 |
+
|
| 394 |
+
# Extract the unique articles back to the list
|
| 395 |
+
all_articles.extend(all_articles_df.to_dict('records'))
|
| 396 |
else:
|
| 397 |
+
# If it's a URL, use it directly (for backward compatibility or external RSS)
|
| 398 |
+
feed_url = rss_link
|
| 399 |
+
feed = feedparser.parse(feed_url)
|
| 400 |
+
|
| 401 |
+
# Log some debug information
|
| 402 |
+
current_app.logger.info(f"Processing RSS feed: {feed_url}")
|
| 403 |
+
current_app.logger.info(f"Number of entries in feed: {len(feed.entries)}")
|
| 404 |
+
|
| 405 |
+
# Extract articles from the feed
|
| 406 |
+
for entry in feed.entries:
|
| 407 |
+
# Use the same date handling as in the original ai_agent.py
|
| 408 |
+
article_data = {
|
| 409 |
+
'title': entry.title,
|
| 410 |
+
'link': entry.link,
|
| 411 |
+
'summary': entry.summary,
|
| 412 |
+
'date': entry.get('published', entry.get('updated', None)),
|
| 413 |
+
'content': entry.get('summary', '') + ' ' + entry.get('title', '')
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
# Log individual article data for debugging
|
| 417 |
+
current_app.logger.info(f"Article title: {entry.title}")
|
| 418 |
+
current_app.logger.info(f"Article date: {article_data['date']}")
|
| 419 |
+
|
| 420 |
+
all_articles.append(article_data)
|
|
|
|
|
|
|
| 421 |
|
| 422 |
# Create a DataFrame from the articles
|
| 423 |
df_articles = pd.DataFrame(all_articles)
|