Spaces:
Running
Running
import streamlit as st | |
import asyncio | |
import aiohttp | |
from newsapi import NewsApiClient | |
from transformers import pipeline | |
from streamlit_extras.colored_header import colored_header | |
from datetime import datetime, timedelta | |
import pandas as pd | |
import plotly.express as px | |
import json | |
import os | |
from tenacity import retry, stop_after_attempt, wait_exponential | |
import logging | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# NIFTY 50 companies with tickers and sectors | |
nifty_50_data = { | |
"Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"}, | |
"Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"}, | |
"Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"}, | |
"Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"}, | |
"Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"}, | |
"Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"}, | |
"Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"}, | |
"Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"}, | |
"Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"}, | |
"BPCL": {"ticker": "BPCL.NS", "sector": "Energy"}, | |
"Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"}, | |
"Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"}, | |
"Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"}, | |
"Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"}, | |
"Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"}, | |
"Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"}, | |
"Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"}, | |
"HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"}, | |
"HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"}, | |
"HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"}, | |
"Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"}, | |
"Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"}, | |
"HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"}, | |
"ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"}, | |
"IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"}, | |
"Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"}, | |
"ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"}, | |
"JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"}, | |
"Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"}, | |
"L&T": {"ticker": "LT.NS", "sector": "Industrials"}, | |
"L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"}, | |
"M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"}, | |
"Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"}, | |
"Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"}, | |
"NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"}, | |
"ONGC": {"ticker": "ONGC.NS", "sector": "Energy"}, | |
"Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"}, | |
"Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"}, | |
"SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"}, | |
"SBI": {"ticker": "SBIN.NS", "sector": "Financials"}, | |
"Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"}, | |
"Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"}, | |
"Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"}, | |
"Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"}, | |
"Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"}, | |
"TCS": {"ticker": "TCS.NS", "sector": "Information Technology"}, | |
"Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"}, | |
"Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"}, | |
"UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"}, | |
"Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"}, | |
} | |
# Streamlit app setup | |
st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide") | |
# Custom CSS with improved accessibility | |
st.markdown(""" | |
<style> | |
.stApp { | |
background: linear-gradient(to bottom right, #f7fafc, #edf2f7); | |
} | |
.sidebar .sidebar-content { | |
background: linear-gradient(to bottom, #2d3748, #4a5568); | |
color: white; | |
border-radius: 8px; | |
padding: 15px; | |
} | |
.stButton>button { | |
background-color: #3182ce; | |
color: white; | |
border-radius: 6px; | |
padding: 8px 16px; | |
font-weight: bold; | |
transition: background-color 0.3s; | |
} | |
.stButton>button:hover { | |
background-color: #2b6cb0; | |
} | |
.stTable { | |
border: 1px solid #e2e8f0; | |
border-radius: 6px; | |
background: #ffffff; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
} | |
.news-container { | |
border: 1px solid #e2e8f0; | |
border-radius: 6px; | |
padding: 12px; | |
margin-bottom: 12px; | |
background: #f7fafc; | |
} | |
.footer { | |
text-align: center; | |
padding: 15px; | |
color: #4a5568; | |
font-size: 14px; | |
} | |
[role="alert"] { | |
outline: 2px solid #3182ce; | |
} | |
.sentiment-positive { | |
color: #38a169; | |
font-weight: bold; | |
} | |
.sentiment-negative { | |
color: #e53e3e; | |
font-weight: bold; | |
} | |
.sentiment-neutral { | |
color: #718096; | |
font-weight: bold; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Load keyword weights from JSON | |
def load_keyword_weights(): | |
try: | |
with open("keyword_weights.json", "r") as f: | |
return json.load(f) | |
except FileNotFoundError: | |
keyword_weights = { | |
"revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3, | |
"share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3, | |
"downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3, | |
"acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2, | |
"investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2, | |
"SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5, | |
"tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5, | |
"appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5, | |
"inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1, | |
"export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1, | |
"cyberattack": 1, "data leak": 1 | |
} | |
with open("keyword_weights.json", "w") as f: | |
json.dump(keyword_weights, f, indent=4) | |
return keyword_weights | |
keyword_weights = load_keyword_weights() | |
# Lazy-load models | |
def init_models(): | |
try: | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1) | |
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1) | |
return summarizer, classifier | |
except Exception as e: | |
st.error(f"Failed to initialize models: {str(e)}") | |
st.stop() | |
summarizer, classifier = None, None | |
# Sidebar controls | |
with st.sidebar: | |
st.title("NIFTY 50 News Analysis") | |
st.info("Analyze news sentiment for companies by sector over different time frames.", icon="βΉοΈ") | |
sectors = sorted(set(data['sector'] for data in nifty_50_data.values())) | |
selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze") | |
selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news") | |
button = st.button("Analyze News", key="analyze_button") | |
# Function to calculate time range | |
def get_date_range(period): | |
end_date = datetime.now() | |
if period == "1D": | |
start_date = end_date - timedelta(hours=36) # Broaden to 36 hours | |
elif period == "5D": | |
start_date = end_date - timedelta(days=5) | |
elif period == "1M": | |
start_date = end_date - timedelta(days=30) | |
elif period == "6M": | |
start_date = end_date - timedelta(days=180) | |
elif period == "YTD": | |
start_date = datetime(end_date.year, 1, 1) | |
elif period == "1Y": | |
start_date = end_date - timedelta(days=365) | |
else: # 5Y | |
start_date = end_date - timedelta(days=365 * 5) | |
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') | |
# Async news fetching with retry logic | |
async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"): | |
try: | |
newsapi = NewsApiClient(api_key=api_key) | |
page_size = 50 if period == "1D" else page_size # Increase for 1D | |
articles = newsapi.get_everything( | |
q=company_name, | |
from_param=from_date, | |
to=to_date if period != "1D" else None, | |
language="en", | |
sort_by="publishedAt", | |
page_size=page_size | |
)["articles"] | |
if period == "1D": | |
relevant_articles = articles # No filtering for 1D | |
else: | |
relevant_articles = [] | |
for article in articles: | |
title = (article.get("title", "") or "").lower() | |
desc = (article.get("description", "") or "").lower() | |
if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()): | |
article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc) | |
relevant_articles.append(article) | |
logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}") | |
return company_name, relevant_articles[:5] | |
except Exception as e: | |
logger.error(f"Error fetching news for {company_name}: {str(e)}") | |
st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.") | |
return company_name, [] | |
# Batch summarize and classify articles | |
def summarize_and_classify_batch(news_articles): | |
try: | |
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} | |
summaries = [] | |
key_themes = {} | |
contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles] | |
contents = [c[:1024] for c in contents if c] | |
if not contents: | |
return [], sentiment_counts, [] | |
summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4) | |
summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]] | |
sentiment_results = classifier(summaries_texts, batch_size=4) | |
for idx, article in enumerate(news_articles): | |
if idx >= len(summaries_texts): | |
continue | |
summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx] | |
sentences = summary.split(". ") | |
key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary) | |
sentiment_result = sentiment_results[idx] | |
sentiment_label = sentiment_result["label"] | |
sentiment_score = sentiment_result["score"] | |
if sentiment_label == "POSITIVE" and sentiment_score > 0.6: | |
sentiment_counts["Positive"] += 1 | |
sentiment_display = "Positive" | |
elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6: | |
sentiment_counts["Negative"] += 1 | |
sentiment_display = "Negative" | |
else: | |
sentiment_counts["Neutral"] += 1 | |
sentiment_display = "Neutral" | |
title = (article.get("title", "") or "").lower() | |
desc = (article.get("description", "") or "").lower() | |
for keyword in keyword_weights: | |
if keyword in title or keyword in desc: | |
key_themes[keyword] = key_themes.get(keyword, 0) + 1 | |
summaries.append({ | |
"title": article.get("title", "No title"), | |
"summary": summary, | |
"key_insight": key_insight, | |
"sentiment": sentiment_display, | |
"confidence": sentiment_score, | |
"url": article.get("url", ""), | |
"published_at": article.get("publishedAt", "") | |
}) | |
top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3] | |
logger.info(f"Sentiment counts: {sentiment_counts}") | |
return summaries[:3], sentiment_counts, top_themes | |
except Exception as e: | |
logger.error(f"Error in summarize_and_classify: {str(e)}") | |
return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, [] | |
# Display news articles | |
def display_news_articles(news_articles, company_name, selected_period): | |
colored_header( | |
f"Summarized News for {company_name} ({selected_period})", | |
description=f"Key Updates from the Selected Period", | |
color_name="blue-70" | |
) | |
for news in news_articles: | |
with st.container(): | |
st.markdown('<div class="news-container" role="article">', unsafe_allow_html=True) | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
st.subheader(news['title'], help="News article title") | |
st.write(f"**Summary**: {news['summary']}") | |
st.write(f"**Key Insight**: {news['key_insight']}") | |
st.markdown(f"[Read More]({news['url']})", unsafe_allow_html=True) | |
with col2: | |
if news['sentiment'] == "Positive": | |
st.markdown(f'<span class="sentiment-positive" role="status">π’ Positive ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True) | |
elif news['sentiment'] == "Negative": | |
st.markdown(f'<span class="sentiment-negative" role="status">π΄ Negative ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True) | |
else: | |
st.markdown(f'<span class="sentiment-neutral" role="status">βͺ Neutral ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True) | |
st.write(f"**Published**: {news['published_at']}") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Main app logic | |
st.title("π° NIFTY 50 Sector News Analysis") | |
st.markdown("Analyze news sentiment for companies in a selected sector to guide investment decisions.", unsafe_allow_html=True) | |
if button: | |
if not summarizer or not classifier: | |
summarizer, classifier = init_models() | |
api_key = os.getenv("NEWSAPI_KEY") | |
if not api_key: | |
st.error("NEWSAPI_KEY environment variable not set. Please configure it.") | |
st.stop() | |
with st.spinner("Fetching and analyzing news..."): | |
from_date, to_date = get_date_range(selected_period) | |
companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector} | |
if not companies_in_sector: | |
st.warning(f"No companies found for {selected_sector} sector.") | |
st.stop() | |
sentiment_data = [] | |
all_news = {} | |
sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} | |
max_articles = 0 | |
sector_themes = {} | |
async def fetch_all_news(): | |
async with aiohttp.ClientSession() as session: | |
tasks = [ | |
fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period=selected_period) | |
for company_name in companies_in_sector.keys() | |
] | |
return await asyncio.gather(*tasks, return_exceptions=True) | |
progress_bar = st.progress(0) | |
progress_text = st.empty() | |
results = asyncio.run(fetch_all_news()) | |
for idx, (company_name, news_articles) in enumerate(results): | |
progress_bar.progress((idx + 1) / len(companies_in_sector)) | |
progress_text.text(f"Processing {company_name} ({idx + 1}/{len(companies_in_sector)})") | |
if news_articles: | |
summarized_news, sentiment_counts, top_themes = summarize_and_classify_batch(news_articles) | |
total_articles = sum(sentiment_counts.values()) | |
max_articles = max(max_articles, total_articles) | |
sentiment_score = (sentiment_counts["Positive"] - sentiment_counts["Negative"]) / total_articles if total_articles > 0 else 0 | |
dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get) | |
sentiment_data.append({ | |
"Company": company_name, | |
"Positive": sentiment_counts["Positive"], | |
"Negative": sentiment_counts["Negative"], | |
"Neutral": sentiment_counts["Neutral"], | |
"Total": total_articles, | |
"Sentiment Score": sentiment_score, | |
"Dominant Sentiment": dominant_sentiment, | |
"Top Themes": [theme[0] for theme in top_themes] | |
}) | |
all_news[company_name] = summarized_news | |
for sentiment, count in sentiment_counts.items(): | |
sector_sentiment_counts[sentiment] += count | |
for theme, count in top_themes: | |
sector_themes[theme] = sector_themes.get(theme, 0) + count | |
else: | |
st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}") | |
progress_bar.empty() | |
progress_text.empty() | |
if sentiment_data: | |
colored_header( | |
f"Sentiment Analysis for {selected_sector} Sector ({selected_period})", | |
description=f"News from {from_date} to {to_date}", | |
color_name="blue-70" | |
) | |
sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]] | |
sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False) | |
st.subheader("Company Sentiment Overview") | |
st.table(sentiment_df) | |
st.subheader("Sentiment Score Distribution") | |
fig = px.bar( | |
sentiment_df, | |
x="Company", | |
y="Sentiment Score", | |
color="Sentiment Score", | |
color_continuous_scale="RdYlGn", | |
title="Sentiment Scores by Company", | |
labels={"Sentiment Score": "Sentiment Score (-1 to 1)"}, | |
height=400 | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
colored_header("π Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70") | |
st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True) | |
sector_total = sum(sector_sentiment_counts.values()) | |
sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0 | |
sector_negative_pct = (sector_sentiment_counts["Negative"] / sector_total * 100) if sector_total > 0 else 0 | |
sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0 | |
sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral" | |
st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)") | |
outlook = "Favorable π" if sector_positive_pct > 50 else "Cautious π" if sector_negative_pct > 50 else "Neutral βοΈ" | |
st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.") | |
negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]] | |
if negative_themes: | |
st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.") | |
st.markdown("**Top Picks**:") | |
for company in sentiment_df.head(2).to_dict("records"): | |
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).") | |
st.markdown("**Tips**:") | |
st.markdown("- Prioritize companies with high article counts for stronger signals.") | |
st.markdown("- Check Neutral news for hidden opportunities or risks.") | |
st.markdown("**Company Insights**:") | |
for company in sentiment_data: | |
confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low" | |
recommendation = "Consider buying π" if company["Sentiment Score"] > 0.3 else "Avoid π" if company["Sentiment Score"] < -0.3 else "Monitor βοΈ" | |
themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none" | |
st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.") | |
for company_name in sentiment_df["Company"]: | |
if company_name in all_news and all_news[company_name]: | |
display_news_articles(all_news[company_name], company_name, selected_period) | |
else: | |
st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.") | |
# Footer | |
st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True) |