Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -182,11 +182,10 @@ with st.sidebar:
|
|
182 |
button = st.button("Analyze News", key="analyze_button")
|
183 |
|
184 |
# Function to calculate time range
|
185 |
-
@st.cache_data
|
186 |
def get_date_range(period):
|
187 |
end_date = datetime.now()
|
188 |
if period == "1D":
|
189 |
-
start_date = end_date - timedelta(hours=36) # Broaden to 36 hours
|
190 |
elif period == "5D":
|
191 |
start_date = end_date - timedelta(days=5)
|
192 |
elif period == "1M":
|
@@ -199,54 +198,53 @@ def get_date_range(period):
|
|
199 |
start_date = end_date - timedelta(days=365)
|
200 |
else: # 5Y
|
201 |
start_date = end_date - timedelta(days=365 * 5)
|
202 |
-
return start_date.strftime('%Y-%m-%
|
203 |
|
204 |
# Async news fetching with retry logic
|
205 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
|
206 |
async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
|
207 |
try:
|
208 |
newsapi = NewsApiClient(api_key=api_key)
|
209 |
-
#
|
210 |
-
min_relevance_weight = 0.5 if period == "1D" else 1.0
|
211 |
articles = newsapi.get_everything(
|
212 |
q=company_name,
|
213 |
from_param=from_date,
|
214 |
-
to=to_date if period != "1D" else None,
|
215 |
language="en",
|
216 |
sort_by="publishedAt",
|
217 |
page_size=page_size
|
218 |
)["articles"]
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
article
|
226 |
-
|
227 |
-
|
228 |
-
|
|
|
|
|
229 |
except Exception as e:
|
230 |
logger.error(f"Error fetching news for {company_name}: {str(e)}")
|
|
|
231 |
return company_name, []
|
232 |
|
233 |
# Batch summarize and classify articles
|
234 |
-
@st.cache_data(ttl=1800) # Cache for 30 minutes
|
235 |
def summarize_and_classify_batch(news_articles):
|
236 |
try:
|
237 |
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
238 |
summaries = []
|
239 |
key_themes = {}
|
240 |
contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
|
241 |
-
contents = [c[:1024] for c in contents if c]
|
242 |
if not contents:
|
243 |
return [], sentiment_counts, []
|
244 |
|
245 |
-
# Batch summarization
|
246 |
summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
|
247 |
summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
|
248 |
|
249 |
-
# Batch sentiment analysis
|
250 |
sentiment_results = classifier(summaries_texts, batch_size=4)
|
251 |
|
252 |
for idx, article in enumerate(news_articles):
|
@@ -258,10 +256,10 @@ def summarize_and_classify_batch(news_articles):
|
|
258 |
sentiment_result = sentiment_results[idx]
|
259 |
sentiment_label = sentiment_result["label"]
|
260 |
sentiment_score = sentiment_result["score"]
|
261 |
-
if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
|
262 |
sentiment_counts["Positive"] += 1
|
263 |
sentiment_display = "Positive"
|
264 |
-
elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
|
265 |
sentiment_counts["Negative"] += 1
|
266 |
sentiment_display = "Negative"
|
267 |
else:
|
@@ -283,7 +281,7 @@ def summarize_and_classify_batch(news_articles):
|
|
283 |
})
|
284 |
top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
|
285 |
logger.info(f"Sentiment counts: {sentiment_counts}")
|
286 |
-
return summaries[:3], sentiment_counts, top_themes
|
287 |
except Exception as e:
|
288 |
logger.error(f"Error in summarize_and_classify: {str(e)}")
|
289 |
return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []
|
@@ -328,16 +326,13 @@ if button:
|
|
328 |
st.stop()
|
329 |
|
330 |
with st.spinner("Fetching and analyzing news..."):
|
331 |
-
# Get date range
|
332 |
from_date, to_date = get_date_range(selected_period)
|
333 |
|
334 |
-
# Filter companies by sector
|
335 |
companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
|
336 |
if not companies_in_sector:
|
337 |
st.warning(f"No companies found for {selected_sector} sector.")
|
338 |
st.stop()
|
339 |
|
340 |
-
# Async news fetching
|
341 |
sentiment_data = []
|
342 |
all_news = {}
|
343 |
sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
@@ -352,7 +347,6 @@ if button:
|
|
352 |
]
|
353 |
return await asyncio.gather(*tasks, return_exceptions=True)
|
354 |
|
355 |
-
# Progress bar
|
356 |
progress_bar = st.progress(0)
|
357 |
progress_text = st.empty()
|
358 |
results = asyncio.run(fetch_all_news())
|
@@ -383,12 +377,11 @@ if button:
|
|
383 |
for theme, count in top_themes:
|
384 |
sector_themes[theme] = sector_themes.get(theme, 0) + count
|
385 |
else:
|
386 |
-
st.warning(f"No
|
387 |
|
388 |
progress_bar.empty()
|
389 |
progress_text.empty()
|
390 |
|
391 |
-
# Display results
|
392 |
if sentiment_data:
|
393 |
colored_header(
|
394 |
f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
|
@@ -396,13 +389,11 @@ if button:
|
|
396 |
color_name="blue-70"
|
397 |
)
|
398 |
|
399 |
-
# Sentiment table
|
400 |
sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
|
401 |
sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
|
402 |
st.subheader("Company Sentiment Overview")
|
403 |
st.table(sentiment_df)
|
404 |
|
405 |
-
# Sentiment visualization
|
406 |
st.subheader("Sentiment Score Distribution")
|
407 |
fig = px.bar(
|
408 |
sentiment_df,
|
@@ -416,9 +407,8 @@ if button:
|
|
416 |
)
|
417 |
st.plotly_chart(fig, use_container_width=True)
|
418 |
|
419 |
-
|
420 |
-
|
421 |
-
st.markdown("**Disclaimer**: These insights are derived from news sentiment and are not financial advice. Consult a certified financial advisor before making investment decisions.", unsafe_allow_html=True)
|
422 |
|
423 |
sector_total = sum(sector_sentiment_counts.values())
|
424 |
sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
|
@@ -426,51 +416,34 @@ if button:
|
|
426 |
sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
|
427 |
sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
|
428 |
|
429 |
-
# Sector-level insights
|
430 |
st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
|
431 |
outlook = "Favorable π" if sector_positive_pct > 50 else "Cautious π" if sector_negative_pct > 50 else "Neutral βοΈ"
|
432 |
-
|
433 |
-
st.markdown(f"- **Investment Outlook**: {outlook} for selective investments in the {selected_sector} sector. Sentiment trend: **{trend}**.")
|
434 |
|
435 |
-
# Risk factors
|
436 |
negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
|
437 |
if negative_themes:
|
438 |
-
st.markdown(f"- **
|
439 |
-
|
440 |
-
# Top companies
|
441 |
-
st.markdown("**Top Companies to Watch**:")
|
442 |
-
top_companies = sentiment_df.head(3).to_dict("records")
|
443 |
-
for company in top_companies:
|
444 |
-
themes_str = ", ".join(sentiment_data[sentiment_data.index(company["Company"] == sentiment_data["Company"])]["Top Themes"][0]) if sentiment_data[sentiment_data.index(company["Company"] == sentiment_data["Company"])]["Top Themes"] else "none"
|
445 |
-
st.markdown(f"- **{company['Company']}**: Sentiment Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Negative']} Negative, {company['Neutral']} Neutral). Driven by {themes_str}.")
|
446 |
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
st.markdown("- Cross-check **Neutral** sentiments with company fundamentals, as they may indicate mixed or unclear news.")
|
451 |
-
st.markdown("- Monitor **negative themes** (e.g., regulation, loss) for potential long-term impacts.")
|
452 |
-
st.markdown("- Use longer time frames (e.g., 1M) for sectors with sparse news to capture broader trends.")
|
453 |
|
454 |
-
|
455 |
-
st.markdown("
|
456 |
-
st.markdown("-
|
457 |
-
st.markdown("- **Medium**: Moderate coverage (30-70% of max articles).")
|
458 |
-
st.markdown("- **Low**: Limited coverage (<30% of max articles). Consider additional research.")
|
459 |
|
460 |
-
# Company insights
|
461 |
st.markdown("**Company Insights**:")
|
462 |
for company in sentiment_data:
|
463 |
confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
|
464 |
recommendation = "Consider buying π" if company["Sentiment Score"] > 0.3 else "Avoid π" if company["Sentiment Score"] < -0.3 else "Monitor βοΈ"
|
465 |
themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
|
466 |
-
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({
|
467 |
|
468 |
-
# Detailed news for each company
|
469 |
for company_name in sentiment_df["Company"]:
|
470 |
if company_name in all_news and all_news[company_name]:
|
471 |
display_news_articles(all_news[company_name], company_name, selected_period)
|
472 |
else:
|
473 |
-
st.warning(f"No
|
474 |
|
475 |
# Footer
|
476 |
st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)
|
|
|
182 |
button = st.button("Analyze News", key="analyze_button")
|
183 |
|
184 |
# Function to calculate time range
|
|
|
185 |
def get_date_range(period):
|
186 |
end_date = datetime.now()
|
187 |
if period == "1D":
|
188 |
+
start_date = end_date - timedelta(hours=36) # Broaden to 36 hours
|
189 |
elif period == "5D":
|
190 |
start_date = end_date - timedelta(days=5)
|
191 |
elif period == "1M":
|
|
|
198 |
start_date = end_date - timedelta(days=365)
|
199 |
else: # 5Y
|
200 |
start_date = end_date - timedelta(days=365 * 5)
|
201 |
+
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
|
202 |
|
203 |
# Async news fetching with retry logic
|
204 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
|
205 |
async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
|
206 |
try:
|
207 |
newsapi = NewsApiClient(api_key=api_key)
|
208 |
+
page_size = 50 if period == "1D" else page_size # Increase for 1D
|
|
|
209 |
articles = newsapi.get_everything(
|
210 |
q=company_name,
|
211 |
from_param=from_date,
|
212 |
+
to=to_date if period != "1D" else None,
|
213 |
language="en",
|
214 |
sort_by="publishedAt",
|
215 |
page_size=page_size
|
216 |
)["articles"]
|
217 |
+
if period == "1D":
|
218 |
+
relevant_articles = articles # No filtering for 1D
|
219 |
+
else:
|
220 |
+
relevant_articles = []
|
221 |
+
for article in articles:
|
222 |
+
title = (article.get("title", "") or "").lower()
|
223 |
+
desc = (article.get("description", "") or "").lower()
|
224 |
+
if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()):
|
225 |
+
article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
|
226 |
+
relevant_articles.append(article)
|
227 |
+
logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}")
|
228 |
+
return company_name, relevant_articles[:5]
|
229 |
except Exception as e:
|
230 |
logger.error(f"Error fetching news for {company_name}: {str(e)}")
|
231 |
+
st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.")
|
232 |
return company_name, []
|
233 |
|
234 |
# Batch summarize and classify articles
|
|
|
235 |
def summarize_and_classify_batch(news_articles):
|
236 |
try:
|
237 |
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
238 |
summaries = []
|
239 |
key_themes = {}
|
240 |
contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
|
241 |
+
contents = [c[:1024] for c in contents if c]
|
242 |
if not contents:
|
243 |
return [], sentiment_counts, []
|
244 |
|
|
|
245 |
summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
|
246 |
summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
|
247 |
|
|
|
248 |
sentiment_results = classifier(summaries_texts, batch_size=4)
|
249 |
|
250 |
for idx, article in enumerate(news_articles):
|
|
|
256 |
sentiment_result = sentiment_results[idx]
|
257 |
sentiment_label = sentiment_result["label"]
|
258 |
sentiment_score = sentiment_result["score"]
|
259 |
+
if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
|
260 |
sentiment_counts["Positive"] += 1
|
261 |
sentiment_display = "Positive"
|
262 |
+
elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
|
263 |
sentiment_counts["Negative"] += 1
|
264 |
sentiment_display = "Negative"
|
265 |
else:
|
|
|
281 |
})
|
282 |
top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
|
283 |
logger.info(f"Sentiment counts: {sentiment_counts}")
|
284 |
+
return summaries[:3], sentiment_counts, top_themes
|
285 |
except Exception as e:
|
286 |
logger.error(f"Error in summarize_and_classify: {str(e)}")
|
287 |
return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []
|
|
|
326 |
st.stop()
|
327 |
|
328 |
with st.spinner("Fetching and analyzing news..."):
|
|
|
329 |
from_date, to_date = get_date_range(selected_period)
|
330 |
|
|
|
331 |
companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
|
332 |
if not companies_in_sector:
|
333 |
st.warning(f"No companies found for {selected_sector} sector.")
|
334 |
st.stop()
|
335 |
|
|
|
336 |
sentiment_data = []
|
337 |
all_news = {}
|
338 |
sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
|
|
347 |
]
|
348 |
return await asyncio.gather(*tasks, return_exceptions=True)
|
349 |
|
|
|
350 |
progress_bar = st.progress(0)
|
351 |
progress_text = st.empty()
|
352 |
results = asyncio.run(fetch_all_news())
|
|
|
377 |
for theme, count in top_themes:
|
378 |
sector_themes[theme] = sector_themes.get(theme, 0) + count
|
379 |
else:
|
380 |
+
st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}")
|
381 |
|
382 |
progress_bar.empty()
|
383 |
progress_text.empty()
|
384 |
|
|
|
385 |
if sentiment_data:
|
386 |
colored_header(
|
387 |
f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
|
|
|
389 |
color_name="blue-70"
|
390 |
)
|
391 |
|
|
|
392 |
sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
|
393 |
sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
|
394 |
st.subheader("Company Sentiment Overview")
|
395 |
st.table(sentiment_df)
|
396 |
|
|
|
397 |
st.subheader("Sentiment Score Distribution")
|
398 |
fig = px.bar(
|
399 |
sentiment_df,
|
|
|
407 |
)
|
408 |
st.plotly_chart(fig, use_container_width=True)
|
409 |
|
410 |
+
colored_header("π Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70")
|
411 |
+
st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True)
|
|
|
412 |
|
413 |
sector_total = sum(sector_sentiment_counts.values())
|
414 |
sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
|
|
|
416 |
sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
|
417 |
sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
|
418 |
|
|
|
419 |
st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
|
420 |
outlook = "Favorable π" if sector_positive_pct > 50 else "Cautious π" if sector_negative_pct > 50 else "Neutral βοΈ"
|
421 |
+
st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.")
|
|
|
422 |
|
|
|
423 |
negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
|
424 |
if negative_themes:
|
425 |
+
st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
+
st.markdown("**Top Picks**:")
|
428 |
+
for company in sentiment_df.head(2).to_dict("records"):
|
429 |
+
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).")
|
|
|
|
|
|
|
430 |
|
431 |
+
st.markdown("**Tips**:")
|
432 |
+
st.markdown("- Prioritize companies with high article counts for stronger signals.")
|
433 |
+
st.markdown("- Check Neutral news for hidden opportunities or risks.")
|
|
|
|
|
434 |
|
|
|
435 |
st.markdown("**Company Insights**:")
|
436 |
for company in sentiment_data:
|
437 |
confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
|
438 |
recommendation = "Consider buying π" if company["Sentiment Score"] > 0.3 else "Avoid π" if company["Sentiment Score"] < -0.3 else "Monitor βοΈ"
|
439 |
themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
|
440 |
+
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.")
|
441 |
|
|
|
442 |
for company_name in sentiment_df["Company"]:
|
443 |
if company_name in all_news and all_news[company_name]:
|
444 |
display_news_articles(all_news[company_name], company_name, selected_period)
|
445 |
else:
|
446 |
+
st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.")
|
447 |
|
448 |
# Footer
|
449 |
st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)
|