MtotoWaJemo's picture
Update app.py
b24af4e verified
import streamlit as st
import asyncio
import aiohttp
from newsapi import NewsApiClient
from transformers import pipeline
from streamlit_extras.colored_header import colored_header
from datetime import datetime, timedelta
import pandas as pd
import plotly.express as px
import json
import os
from tenacity import retry, stop_after_attempt, wait_exponential
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# NIFTY 50 companies with tickers and sectors
nifty_50_data = {
"Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"},
"Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"},
"Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"},
"Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"},
"Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"},
"Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"},
"Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"},
"Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"},
"Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"},
"BPCL": {"ticker": "BPCL.NS", "sector": "Energy"},
"Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"},
"Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"},
"Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"},
"Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"},
"Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"},
"Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"},
"Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"},
"HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"},
"HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"},
"HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"},
"Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"},
"Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"},
"HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"},
"ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"},
"IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"},
"Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"},
"ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"},
"JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"},
"Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"},
"L&T": {"ticker": "LT.NS", "sector": "Industrials"},
"L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"},
"M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"},
"Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"},
"Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"},
"NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"},
"ONGC": {"ticker": "ONGC.NS", "sector": "Energy"},
"Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"},
"Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"},
"SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"},
"SBI": {"ticker": "SBIN.NS", "sector": "Financials"},
"Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"},
"Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"},
"Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"},
"Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"},
"Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"},
"TCS": {"ticker": "TCS.NS", "sector": "Information Technology"},
"Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"},
"Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"},
"UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"},
"Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"},
}
# Streamlit app setup
st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide")
# Custom CSS with improved accessibility
st.markdown("""
<style>
.stApp {
background: linear-gradient(to bottom right, #f7fafc, #edf2f7);
}
.sidebar .sidebar-content {
background: linear-gradient(to bottom, #2d3748, #4a5568);
color: white;
border-radius: 8px;
padding: 15px;
}
.stButton>button {
background-color: #3182ce;
color: white;
border-radius: 6px;
padding: 8px 16px;
font-weight: bold;
transition: background-color 0.3s;
}
.stButton>button:hover {
background-color: #2b6cb0;
}
.stTable {
border: 1px solid #e2e8f0;
border-radius: 6px;
background: #ffffff;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.news-container {
border: 1px solid #e2e8f0;
border-radius: 6px;
padding: 12px;
margin-bottom: 12px;
background: #f7fafc;
}
.footer {
text-align: center;
padding: 15px;
color: #4a5568;
font-size: 14px;
}
[role="alert"] {
outline: 2px solid #3182ce;
}
.sentiment-positive {
color: #38a169;
font-weight: bold;
}
.sentiment-negative {
color: #e53e3e;
font-weight: bold;
}
.sentiment-neutral {
color: #718096;
font-weight: bold;
}
</style>
""", unsafe_allow_html=True)
# Load keyword weights from JSON
@st.cache_resource
def load_keyword_weights():
try:
with open("keyword_weights.json", "r") as f:
return json.load(f)
except FileNotFoundError:
keyword_weights = {
"revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3,
"share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3,
"downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3,
"acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2,
"investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2,
"SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5,
"tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5,
"appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5,
"inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1,
"export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1,
"cyberattack": 1, "data leak": 1
}
with open("keyword_weights.json", "w") as f:
json.dump(keyword_weights, f, indent=4)
return keyword_weights
keyword_weights = load_keyword_weights()
# Lazy-load models
@st.cache_resource
def init_models():
try:
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1)
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
return summarizer, classifier
except Exception as e:
st.error(f"Failed to initialize models: {str(e)}")
st.stop()
summarizer, classifier = None, None
# Sidebar controls
with st.sidebar:
st.title("NIFTY 50 News Analysis")
st.info("Analyze news sentiment for companies by sector over different time frames.", icon="ℹ️")
sectors = sorted(set(data['sector'] for data in nifty_50_data.values()))
selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze")
selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news")
button = st.button("Analyze News", key="analyze_button")
# Function to calculate time range
def get_date_range(period):
end_date = datetime.now()
if period == "1D":
start_date = end_date - timedelta(hours=36) # Broaden to 36 hours
elif period == "5D":
start_date = end_date - timedelta(days=5)
elif period == "1M":
start_date = end_date - timedelta(days=30)
elif period == "6M":
start_date = end_date - timedelta(days=180)
elif period == "YTD":
start_date = datetime(end_date.year, 1, 1)
elif period == "1Y":
start_date = end_date - timedelta(days=365)
else: # 5Y
start_date = end_date - timedelta(days=365 * 5)
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
# Async news fetching with retry logic
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
try:
newsapi = NewsApiClient(api_key=api_key)
page_size = 50 if period == "1D" else page_size # Increase for 1D
articles = newsapi.get_everything(
q=company_name,
from_param=from_date,
to=to_date if period != "1D" else None,
language="en",
sort_by="publishedAt",
page_size=page_size
)["articles"]
if period == "1D":
relevant_articles = articles # No filtering for 1D
else:
relevant_articles = []
for article in articles:
title = (article.get("title", "") or "").lower()
desc = (article.get("description", "") or "").lower()
if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()):
article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
relevant_articles.append(article)
logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}")
return company_name, relevant_articles[:5]
except Exception as e:
logger.error(f"Error fetching news for {company_name}: {str(e)}")
st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.")
return company_name, []
# Batch summarize and classify articles
def summarize_and_classify_batch(news_articles):
try:
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
summaries = []
key_themes = {}
contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
contents = [c[:1024] for c in contents if c]
if not contents:
return [], sentiment_counts, []
summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
sentiment_results = classifier(summaries_texts, batch_size=4)
for idx, article in enumerate(news_articles):
if idx >= len(summaries_texts):
continue
summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx]
sentences = summary.split(". ")
key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary)
sentiment_result = sentiment_results[idx]
sentiment_label = sentiment_result["label"]
sentiment_score = sentiment_result["score"]
if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
sentiment_counts["Positive"] += 1
sentiment_display = "Positive"
elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
sentiment_counts["Negative"] += 1
sentiment_display = "Negative"
else:
sentiment_counts["Neutral"] += 1
sentiment_display = "Neutral"
title = (article.get("title", "") or "").lower()
desc = (article.get("description", "") or "").lower()
for keyword in keyword_weights:
if keyword in title or keyword in desc:
key_themes[keyword] = key_themes.get(keyword, 0) + 1
summaries.append({
"title": article.get("title", "No title"),
"summary": summary,
"key_insight": key_insight,
"sentiment": sentiment_display,
"confidence": sentiment_score,
"url": article.get("url", ""),
"published_at": article.get("publishedAt", "")
})
top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
logger.info(f"Sentiment counts: {sentiment_counts}")
return summaries[:3], sentiment_counts, top_themes
except Exception as e:
logger.error(f"Error in summarize_and_classify: {str(e)}")
return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []
# Display news articles
def display_news_articles(news_articles, company_name, selected_period):
colored_header(
f"Summarized News for {company_name} ({selected_period})",
description=f"Key Updates from the Selected Period",
color_name="blue-70"
)
for news in news_articles:
with st.container():
st.markdown('<div class="news-container" role="article">', unsafe_allow_html=True)
col1, col2 = st.columns([3, 1])
with col1:
st.subheader(news['title'], help="News article title")
st.write(f"**Summary**: {news['summary']}")
st.write(f"**Key Insight**: {news['key_insight']}")
st.markdown(f"[Read More]({news['url']})", unsafe_allow_html=True)
with col2:
if news['sentiment'] == "Positive":
st.markdown(f'<span class="sentiment-positive" role="status">🟒 Positive ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
elif news['sentiment'] == "Negative":
st.markdown(f'<span class="sentiment-negative" role="status">πŸ”΄ Negative ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
else:
st.markdown(f'<span class="sentiment-neutral" role="status">βšͺ Neutral ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
st.write(f"**Published**: {news['published_at']}")
st.markdown('</div>', unsafe_allow_html=True)
# Main app logic
st.title("πŸ“° NIFTY 50 Sector News Analysis")
st.markdown("Analyze news sentiment for companies in a selected sector to guide investment decisions.", unsafe_allow_html=True)
if button:
if not summarizer or not classifier:
summarizer, classifier = init_models()
api_key = os.getenv("NEWSAPI_KEY")
if not api_key:
st.error("NEWSAPI_KEY environment variable not set. Please configure it.")
st.stop()
with st.spinner("Fetching and analyzing news..."):
from_date, to_date = get_date_range(selected_period)
companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
if not companies_in_sector:
st.warning(f"No companies found for {selected_sector} sector.")
st.stop()
sentiment_data = []
all_news = {}
sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
max_articles = 0
sector_themes = {}
async def fetch_all_news():
async with aiohttp.ClientSession() as session:
tasks = [
fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period=selected_period)
for company_name in companies_in_sector.keys()
]
return await asyncio.gather(*tasks, return_exceptions=True)
progress_bar = st.progress(0)
progress_text = st.empty()
results = asyncio.run(fetch_all_news())
for idx, (company_name, news_articles) in enumerate(results):
progress_bar.progress((idx + 1) / len(companies_in_sector))
progress_text.text(f"Processing {company_name} ({idx + 1}/{len(companies_in_sector)})")
if news_articles:
summarized_news, sentiment_counts, top_themes = summarize_and_classify_batch(news_articles)
total_articles = sum(sentiment_counts.values())
max_articles = max(max_articles, total_articles)
sentiment_score = (sentiment_counts["Positive"] - sentiment_counts["Negative"]) / total_articles if total_articles > 0 else 0
dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get)
sentiment_data.append({
"Company": company_name,
"Positive": sentiment_counts["Positive"],
"Negative": sentiment_counts["Negative"],
"Neutral": sentiment_counts["Neutral"],
"Total": total_articles,
"Sentiment Score": sentiment_score,
"Dominant Sentiment": dominant_sentiment,
"Top Themes": [theme[0] for theme in top_themes]
})
all_news[company_name] = summarized_news
for sentiment, count in sentiment_counts.items():
sector_sentiment_counts[sentiment] += count
for theme, count in top_themes:
sector_themes[theme] = sector_themes.get(theme, 0) + count
else:
st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}")
progress_bar.empty()
progress_text.empty()
if sentiment_data:
colored_header(
f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
description=f"News from {from_date} to {to_date}",
color_name="blue-70"
)
sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
st.subheader("Company Sentiment Overview")
st.table(sentiment_df)
st.subheader("Sentiment Score Distribution")
fig = px.bar(
sentiment_df,
x="Company",
y="Sentiment Score",
color="Sentiment Score",
color_continuous_scale="RdYlGn",
title="Sentiment Scores by Company",
labels={"Sentiment Score": "Sentiment Score (-1 to 1)"},
height=400
)
st.plotly_chart(fig, use_container_width=True)
colored_header("πŸ“Š Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70")
st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True)
sector_total = sum(sector_sentiment_counts.values())
sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
sector_negative_pct = (sector_sentiment_counts["Negative"] / sector_total * 100) if sector_total > 0 else 0
sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
outlook = "Favorable πŸ“ˆ" if sector_positive_pct > 50 else "Cautious πŸ“‰" if sector_negative_pct > 50 else "Neutral βš–οΈ"
st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.")
negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
if negative_themes:
st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.")
st.markdown("**Top Picks**:")
for company in sentiment_df.head(2).to_dict("records"):
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).")
st.markdown("**Tips**:")
st.markdown("- Prioritize companies with high article counts for stronger signals.")
st.markdown("- Check Neutral news for hidden opportunities or risks.")
st.markdown("**Company Insights**:")
for company in sentiment_data:
confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
recommendation = "Consider buying πŸ“ˆ" if company["Sentiment Score"] > 0.3 else "Avoid πŸ“‰" if company["Sentiment Score"] < -0.3 else "Monitor βš–οΈ"
themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.")
for company_name in sentiment_df["Company"]:
if company_name in all_news and all_news[company_name]:
display_news_articles(all_news[company_name], company_name, selected_period)
else:
st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.")
# Footer
st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)