Zelyanoth commited on
Commit
6101ff2
·
1 Parent(s): 0608375

feat(content): update RSS feed processing to include user country and language preferences

Browse files
Files changed (1) hide show
  1. backend/services/content_service.py +80 -31
backend/services/content_service.py CHANGED
@@ -43,7 +43,7 @@ class ContentService:
43
 
44
  try:
45
  # Check if the Hugging Face Space exists and is accessible before creating the client
46
- self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key)
47
 
48
  # Log success using safe approach
49
  try:
@@ -328,6 +328,10 @@ class ContentService:
328
 
329
  user_rss_sources = rss_response.data if rss_response.data else []
330
 
 
 
 
 
331
  # Analyze each RSS source for frequency of new articles/links
332
  keyword_data = []
333
 
@@ -338,37 +342,82 @@ class ContentService:
338
  rss_link = rss_source["source"]
339
 
340
  # Check if the source is a keyword rather than an RSS URL
341
- # If it's a keyword, generate a Google News RSS URL
342
- if self._is_url(rss_link):
343
- # It's a URL, use it directly
344
- feed_url = rss_link
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  else:
346
- # It's a keyword, generate Google News RSS URL
347
- feed_url = self._generate_google_news_rss_from_string(rss_link)
348
-
349
- # Parse the RSS feed
350
- feed = feedparser.parse(feed_url)
351
-
352
- # Log some debug information
353
- current_app.logger.info(f"Processing RSS feed: {feed_url}")
354
- current_app.logger.info(f"Number of entries in feed: {len(feed.entries)}")
355
-
356
- # Extract articles from the feed
357
- for entry in feed.entries:
358
- # Use the same date handling as in the original ai_agent.py
359
- article_data = {
360
- 'title': entry.title,
361
- 'link': entry.link,
362
- 'summary': entry.summary,
363
- 'date': entry.get('published', entry.get('updated', None)),
364
- 'content': entry.get('summary', '') + ' ' + entry.get('title', '')
365
- }
366
-
367
- # Log individual article data for debugging
368
- current_app.logger.info(f"Article title: {entry.title}")
369
- current_app.logger.info(f"Article date: {article_data['date']}")
370
-
371
- all_articles.append(article_data)
372
 
373
  # Create a DataFrame from the articles
374
  df_articles = pd.DataFrame(all_articles)
 
43
 
44
  try:
45
  # Check if the Hugging Face Space exists and is accessible before creating the client
46
+ self.client = Client("Zelyanoth/Linkedin_poster_dev", token=self.hugging_key)
47
 
48
  # Log success using safe approach
49
  try:
 
328
 
329
  user_rss_sources = rss_response.data if rss_response.data else []
330
 
331
+ # Get user preferences for country and language
332
+ user_prefs = self._get_user_preferences(user_id)
333
+ user_country = user_prefs["country"]
334
+
335
  # Analyze each RSS source for frequency of new articles/links
336
  keyword_data = []
337
 
 
342
  rss_link = rss_source["source"]
343
 
344
  # Check if the source is a keyword rather than an RSS URL
345
+ # If it's a keyword, generate a Google News RSS URL with user's preferences
346
+ if not self._is_url(rss_link):
347
+ # It's a keyword, generate Google News RSS URLs for both English and French for user's country
348
+ english_feed_url = self._generate_google_news_rss_from_string(rss_link, language="en", country=user_country)
349
+ french_feed_url = self._generate_google_news_rss_from_string(rss_link, language="fr", country=user_country)
350
+
351
+ # Process both English and French feeds
352
+ english_feed = feedparser.parse(english_feed_url)
353
+ french_feed = feedparser.parse(french_feed_url)
354
+
355
+ # Extract articles from both feeds
356
+ english_articles = []
357
+ for entry in english_feed.entries:
358
+ article_data = {
359
+ 'title': entry.title,
360
+ 'link': entry.link,
361
+ 'summary': entry.summary,
362
+ 'date': entry.get('published', entry.get('updated', None)),
363
+ 'content': entry.get('summary', '') + ' ' + entry.get('title', ''),
364
+ 'language': 'en' # Add language indicator
365
+ }
366
+ english_articles.append(article_data)
367
+
368
+ french_articles = []
369
+ for entry in french_feed.entries:
370
+ article_data = {
371
+ 'title': entry.title,
372
+ 'link': entry.link,
373
+ 'summary': entry.summary,
374
+ 'date': entry.get('published', entry.get('updated', None)),
375
+ 'content': entry.get('summary', '') + ' ' + entry.get('title', ''),
376
+ 'language': 'fr' # Add language indicator
377
+ }
378
+ french_articles.append(article_data)
379
+
380
+ # Convert to DataFrames
381
+ english_df = pd.DataFrame(english_articles)
382
+ french_df = pd.DataFrame(french_articles)
383
+
384
+ # Merge dataframes to remove duplicates based on article URL
385
+ if not english_df.empty and not french_df.empty:
386
+ all_articles_df = self._merge_dataframes(english_df, french_df)
387
+ elif not english_df.empty:
388
+ all_articles_df = english_df
389
+ elif not french_df.empty:
390
+ all_articles_df = french_df
391
+ else:
392
+ all_articles_df = pd.DataFrame()
393
+
394
+ # Extract the unique articles back to the list
395
+ all_articles.extend(all_articles_df.to_dict('records'))
396
  else:
397
+ # If it's a URL, use it directly (for backward compatibility or external RSS)
398
+ feed_url = rss_link
399
+ feed = feedparser.parse(feed_url)
400
+
401
+ # Log some debug information
402
+ current_app.logger.info(f"Processing RSS feed: {feed_url}")
403
+ current_app.logger.info(f"Number of entries in feed: {len(feed.entries)}")
404
+
405
+ # Extract articles from the feed
406
+ for entry in feed.entries:
407
+ # Use the same date handling as in the original ai_agent.py
408
+ article_data = {
409
+ 'title': entry.title,
410
+ 'link': entry.link,
411
+ 'summary': entry.summary,
412
+ 'date': entry.get('published', entry.get('updated', None)),
413
+ 'content': entry.get('summary', '') + ' ' + entry.get('title', '')
414
+ }
415
+
416
+ # Log individual article data for debugging
417
+ current_app.logger.info(f"Article title: {entry.title}")
418
+ current_app.logger.info(f"Article date: {article_data['date']}")
419
+
420
+ all_articles.append(article_data)
 
 
421
 
422
  # Create a DataFrame from the articles
423
  df_articles = pd.DataFrame(all_articles)