MtotoWaJemo commited on
Commit
b24af4e
Β·
verified Β·
1 Parent(s): c4fbf73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -61
app.py CHANGED
@@ -182,11 +182,10 @@ with st.sidebar:
182
  button = st.button("Analyze News", key="analyze_button")
183
 
184
  # Function to calculate time range
185
- @st.cache_data
186
  def get_date_range(period):
187
  end_date = datetime.now()
188
  if period == "1D":
189
- start_date = end_date - timedelta(hours=36) # Broaden to 36 hours for more results
190
  elif period == "5D":
191
  start_date = end_date - timedelta(days=5)
192
  elif period == "1M":
@@ -199,54 +198,53 @@ def get_date_range(period):
199
  start_date = end_date - timedelta(days=365)
200
  else: # 5Y
201
  start_date = end_date - timedelta(days=365 * 5)
202
- return start_date.strftime('%Y-%m-%dT%H:%M:%SZ'), end_date.strftime('%Y-%m-%dT%H:%M:%SZ')
203
 
204
  # Async news fetching with retry logic
205
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
206
  async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
207
  try:
208
  newsapi = NewsApiClient(api_key=api_key)
209
- # Relax filtering for 1D
210
- min_relevance_weight = 0.5 if period == "1D" else 1.0
211
  articles = newsapi.get_everything(
212
  q=company_name,
213
  from_param=from_date,
214
- to=to_date if period != "1D" else None, # Fetch up to now for 1D
215
  language="en",
216
  sort_by="publishedAt",
217
  page_size=page_size
218
  )["articles"]
219
- relevant_articles = []
220
- for article in articles:
221
- title = (article.get("title", "") or "").lower()
222
- desc = (article.get("description", "") or "").lower()
223
- relevance_weight = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
224
- if relevance_weight >= min_relevance_weight:
225
- article["relevance_weight"] = relevance_weight
226
- relevant_articles.append(article)
227
- logger.info(f"Fetched {len(relevant_articles)} articles for {company_name} in {period}")
228
- return company_name, sorted(relevant_articles, key=lambda x: x["relevance_weight"], reverse=True)[:5]
 
 
229
  except Exception as e:
230
  logger.error(f"Error fetching news for {company_name}: {str(e)}")
 
231
  return company_name, []
232
 
233
  # Batch summarize and classify articles
234
- @st.cache_data(ttl=1800) # Cache for 30 minutes
235
  def summarize_and_classify_batch(news_articles):
236
  try:
237
  sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
238
  summaries = []
239
  key_themes = {}
240
  contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
241
- contents = [c[:1024] for c in contents if c] # Truncate for speed
242
  if not contents:
243
  return [], sentiment_counts, []
244
 
245
- # Batch summarization
246
  summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
247
  summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
248
 
249
- # Batch sentiment analysis
250
  sentiment_results = classifier(summaries_texts, batch_size=4)
251
 
252
  for idx, article in enumerate(news_articles):
@@ -258,10 +256,10 @@ def summarize_and_classify_batch(news_articles):
258
  sentiment_result = sentiment_results[idx]
259
  sentiment_label = sentiment_result["label"]
260
  sentiment_score = sentiment_result["score"]
261
- if sentiment_label == "POSITIVE" and sentiment_score > 0.6: # Lowered threshold
262
  sentiment_counts["Positive"] += 1
263
  sentiment_display = "Positive"
264
- elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6: # Lowered threshold
265
  sentiment_counts["Negative"] += 1
266
  sentiment_display = "Negative"
267
  else:
@@ -283,7 +281,7 @@ def summarize_and_classify_batch(news_articles):
283
  })
284
  top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
285
  logger.info(f"Sentiment counts: {sentiment_counts}")
286
- return summaries[:3], sentiment_counts, top_themes # Include all sentiments
287
  except Exception as e:
288
  logger.error(f"Error in summarize_and_classify: {str(e)}")
289
  return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []
@@ -328,16 +326,13 @@ if button:
328
  st.stop()
329
 
330
  with st.spinner("Fetching and analyzing news..."):
331
- # Get date range
332
  from_date, to_date = get_date_range(selected_period)
333
 
334
- # Filter companies by sector
335
  companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
336
  if not companies_in_sector:
337
  st.warning(f"No companies found for {selected_sector} sector.")
338
  st.stop()
339
 
340
- # Async news fetching
341
  sentiment_data = []
342
  all_news = {}
343
  sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
@@ -352,7 +347,6 @@ if button:
352
  ]
353
  return await asyncio.gather(*tasks, return_exceptions=True)
354
 
355
- # Progress bar
356
  progress_bar = st.progress(0)
357
  progress_text = st.empty()
358
  results = asyncio.run(fetch_all_news())
@@ -383,12 +377,11 @@ if button:
383
  for theme, count in top_themes:
384
  sector_themes[theme] = sector_themes.get(theme, 0) + count
385
  else:
386
- st.warning(f"No relevant news found for {company_name}.{' Try a longer time frame like 5D or 1M.' if selected_period == '1D' else ''}")
387
 
388
  progress_bar.empty()
389
  progress_text.empty()
390
 
391
- # Display results
392
  if sentiment_data:
393
  colored_header(
394
  f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
@@ -396,13 +389,11 @@ if button:
396
  color_name="blue-70"
397
  )
398
 
399
- # Sentiment table
400
  sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
401
  sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
402
  st.subheader("Company Sentiment Overview")
403
  st.table(sentiment_df)
404
 
405
- # Sentiment visualization
406
  st.subheader("Sentiment Score Distribution")
407
  fig = px.bar(
408
  sentiment_df,
@@ -416,9 +407,8 @@ if button:
416
  )
417
  st.plotly_chart(fig, use_container_width=True)
418
 
419
- # Enhanced Decision Guidance
420
- colored_header("πŸ“Š Decision Guidance", description="Investment Insights Based on News Sentiment", color_name="violet-70")
421
- st.markdown("**Disclaimer**: These insights are derived from news sentiment and are not financial advice. Consult a certified financial advisor before making investment decisions.", unsafe_allow_html=True)
422
 
423
  sector_total = sum(sector_sentiment_counts.values())
424
  sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
@@ -426,51 +416,34 @@ if button:
426
  sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
427
  sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
428
 
429
- # Sector-level insights
430
  st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
431
  outlook = "Favorable πŸ“ˆ" if sector_positive_pct > 50 else "Cautious πŸ“‰" if sector_negative_pct > 50 else "Neutral βš–οΈ"
432
- trend = "Improving" if sector_positive_pct > sector_neutral_pct else "Declining" if sector_negative_pct > sector_positive_pct else "Stable"
433
- st.markdown(f"- **Investment Outlook**: {outlook} for selective investments in the {selected_sector} sector. Sentiment trend: **{trend}**.")
434
 
435
- # Risk factors
436
  negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
437
  if negative_themes:
438
- st.markdown(f"- **Risk Factors**: Potential concerns include {', '.join(negative_themes[:3])}. Monitor these closely.")
439
-
440
- # Top companies
441
- st.markdown("**Top Companies to Watch**:")
442
- top_companies = sentiment_df.head(3).to_dict("records")
443
- for company in top_companies:
444
- themes_str = ", ".join(sentiment_data[sentiment_data.index(company["Company"] == sentiment_data["Company"])]["Top Themes"][0]) if sentiment_data[sentiment_data.index(company["Company"] == sentiment_data["Company"])]["Top Themes"] else "none"
445
- st.markdown(f"- **{company['Company']}**: Sentiment Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Negative']} Negative, {company['Neutral']} Neutral). Driven by {themes_str}.")
446
 
447
- # Actionable tips
448
- st.markdown("**Actionable Tips**:")
449
- st.markdown("- Focus on companies with **high article volume** (e.g., >5 articles) for more reliable sentiment signals.")
450
- st.markdown("- Cross-check **Neutral** sentiments with company fundamentals, as they may indicate mixed or unclear news.")
451
- st.markdown("- Monitor **negative themes** (e.g., regulation, loss) for potential long-term impacts.")
452
- st.markdown("- Use longer time frames (e.g., 1M) for sectors with sparse news to capture broader trends.")
453
 
454
- # Confidence context
455
- st.markdown("**Confidence Levels**:")
456
- st.markdown("- **High**: Based on robust news coverage (>70% of max articles).")
457
- st.markdown("- **Medium**: Moderate coverage (30-70% of max articles).")
458
- st.markdown("- **Low**: Limited coverage (<30% of max articles). Consider additional research.")
459
 
460
- # Company insights
461
  st.markdown("**Company Insights**:")
462
  for company in sentiment_data:
463
  confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
464
  recommendation = "Consider buying πŸ“ˆ" if company["Sentiment Score"] > 0.3 else "Avoid πŸ“‰" if company["Sentiment Score"] < -0.3 else "Monitor βš–οΈ"
465
  themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
466
- st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Dominant Sentiment']}, driven by {themes_str}), {company['Total']} articles (Confidence: {confidence}). **Recommendation**: {recommendation}.")
467
 
468
- # Detailed news for each company
469
  for company_name in sentiment_df["Company"]:
470
  if company_name in all_news and all_news[company_name]:
471
  display_news_articles(all_news[company_name], company_name, selected_period)
472
  else:
473
- st.warning(f"No relevant news found for {selected_sector} sector in the selected period. Try a longer time frame like 5D or 1M.")
474
 
475
  # Footer
476
  st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)
 
182
  button = st.button("Analyze News", key="analyze_button")
183
 
184
  # Function to calculate time range
 
185
  def get_date_range(period):
186
  end_date = datetime.now()
187
  if period == "1D":
188
+ start_date = end_date - timedelta(hours=36) # Broaden to 36 hours
189
  elif period == "5D":
190
  start_date = end_date - timedelta(days=5)
191
  elif period == "1M":
 
198
  start_date = end_date - timedelta(days=365)
199
  else: # 5Y
200
  start_date = end_date - timedelta(days=365 * 5)
201
+ return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
202
 
203
  # Async news fetching with retry logic
204
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
205
  async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
206
  try:
207
  newsapi = NewsApiClient(api_key=api_key)
208
+ page_size = 50 if period == "1D" else page_size # Increase for 1D
 
209
  articles = newsapi.get_everything(
210
  q=company_name,
211
  from_param=from_date,
212
+ to=to_date if period != "1D" else None,
213
  language="en",
214
  sort_by="publishedAt",
215
  page_size=page_size
216
  )["articles"]
217
+ if period == "1D":
218
+ relevant_articles = articles # No filtering for 1D
219
+ else:
220
+ relevant_articles = []
221
+ for article in articles:
222
+ title = (article.get("title", "") or "").lower()
223
+ desc = (article.get("description", "") or "").lower()
224
+ if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()):
225
+ article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
226
+ relevant_articles.append(article)
227
+ logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}")
228
+ return company_name, relevant_articles[:5]
229
  except Exception as e:
230
  logger.error(f"Error fetching news for {company_name}: {str(e)}")
231
+ st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.")
232
  return company_name, []
233
 
234
  # Batch summarize and classify articles
 
235
  def summarize_and_classify_batch(news_articles):
236
  try:
237
  sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
238
  summaries = []
239
  key_themes = {}
240
  contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
241
+ contents = [c[:1024] for c in contents if c]
242
  if not contents:
243
  return [], sentiment_counts, []
244
 
 
245
  summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
246
  summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
247
 
 
248
  sentiment_results = classifier(summaries_texts, batch_size=4)
249
 
250
  for idx, article in enumerate(news_articles):
 
256
  sentiment_result = sentiment_results[idx]
257
  sentiment_label = sentiment_result["label"]
258
  sentiment_score = sentiment_result["score"]
259
+ if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
260
  sentiment_counts["Positive"] += 1
261
  sentiment_display = "Positive"
262
+ elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
263
  sentiment_counts["Negative"] += 1
264
  sentiment_display = "Negative"
265
  else:
 
281
  })
282
  top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
283
  logger.info(f"Sentiment counts: {sentiment_counts}")
284
+ return summaries[:3], sentiment_counts, top_themes
285
  except Exception as e:
286
  logger.error(f"Error in summarize_and_classify: {str(e)}")
287
  return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []
 
326
  st.stop()
327
 
328
  with st.spinner("Fetching and analyzing news..."):
 
329
  from_date, to_date = get_date_range(selected_period)
330
 
 
331
  companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
332
  if not companies_in_sector:
333
  st.warning(f"No companies found for {selected_sector} sector.")
334
  st.stop()
335
 
 
336
  sentiment_data = []
337
  all_news = {}
338
  sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
 
347
  ]
348
  return await asyncio.gather(*tasks, return_exceptions=True)
349
 
 
350
  progress_bar = st.progress(0)
351
  progress_text = st.empty()
352
  results = asyncio.run(fetch_all_news())
 
377
  for theme, count in top_themes:
378
  sector_themes[theme] = sector_themes.get(theme, 0) + count
379
  else:
380
+ st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}")
381
 
382
  progress_bar.empty()
383
  progress_text.empty()
384
 
 
385
  if sentiment_data:
386
  colored_header(
387
  f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
 
389
  color_name="blue-70"
390
  )
391
 
 
392
  sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
393
  sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
394
  st.subheader("Company Sentiment Overview")
395
  st.table(sentiment_df)
396
 
 
397
  st.subheader("Sentiment Score Distribution")
398
  fig = px.bar(
399
  sentiment_df,
 
407
  )
408
  st.plotly_chart(fig, use_container_width=True)
409
 
410
+ colored_header("πŸ“Š Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70")
411
+ st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True)
 
412
 
413
  sector_total = sum(sector_sentiment_counts.values())
414
  sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
 
416
  sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
417
  sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
418
 
 
419
  st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
420
  outlook = "Favorable πŸ“ˆ" if sector_positive_pct > 50 else "Cautious πŸ“‰" if sector_negative_pct > 50 else "Neutral βš–οΈ"
421
+ st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.")
 
422
 
 
423
  negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
424
  if negative_themes:
425
+ st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.")
 
 
 
 
 
 
 
426
 
427
+ st.markdown("**Top Picks**:")
428
+ for company in sentiment_df.head(2).to_dict("records"):
429
+ st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).")
 
 
 
430
 
431
+ st.markdown("**Tips**:")
432
+ st.markdown("- Prioritize companies with high article counts for stronger signals.")
433
+ st.markdown("- Check Neutral news for hidden opportunities or risks.")
 
 
434
 
 
435
  st.markdown("**Company Insights**:")
436
  for company in sentiment_data:
437
  confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
438
  recommendation = "Consider buying πŸ“ˆ" if company["Sentiment Score"] > 0.3 else "Avoid πŸ“‰" if company["Sentiment Score"] < -0.3 else "Monitor βš–οΈ"
439
  themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
440
+ st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.")
441
 
 
442
  for company_name in sentiment_df["Company"]:
443
  if company_name in all_news and all_news[company_name]:
444
  display_news_articles(all_news[company_name], company_name, selected_period)
445
  else:
446
+ st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.")
447
 
448
  # Footer
449
  st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)