import streamlit as st import asyncio import aiohttp from newsapi import NewsApiClient from transformers import pipeline from streamlit_extras.colored_header import colored_header from datetime import datetime, timedelta import pandas as pd import plotly.express as px import json import os from tenacity import retry, stop_after_attempt, wait_exponential import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # NIFTY 50 companies with tickers and sectors nifty_50_data = { "Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"}, "Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"}, "Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"}, "Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"}, "Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"}, "Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"}, "Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"}, "Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"}, "Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"}, "BPCL": {"ticker": "BPCL.NS", "sector": "Energy"}, "Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"}, "Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"}, "Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"}, "Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"}, "Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"}, "Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"}, "Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"}, "HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"}, "HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"}, "HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"}, "Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"}, "Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"}, "HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"}, "ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"}, "IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"}, "Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"}, "ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"}, "JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"}, "Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"}, "L&T": {"ticker": "LT.NS", "sector": "Industrials"}, "L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"}, "M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"}, "Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"}, "Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"}, "NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"}, "ONGC": {"ticker": "ONGC.NS", "sector": "Energy"}, "Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"}, "Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"}, "SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"}, "SBI": {"ticker": "SBIN.NS", "sector": "Financials"}, "Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"}, "Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"}, "Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"}, "Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"}, "Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"}, "TCS": {"ticker": "TCS.NS", "sector": "Information Technology"}, "Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"}, "Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"}, "UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"}, "Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"}, } # Streamlit app setup st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide") # Custom CSS with improved accessibility st.markdown(""" """, unsafe_allow_html=True) # Load keyword weights from JSON @st.cache_resource def load_keyword_weights(): try: with open("keyword_weights.json", "r") as f: return json.load(f) except FileNotFoundError: keyword_weights = { "revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3, "share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3, "downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3, "acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2, "investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2, "SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5, "tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5, "appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5, "inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1, "export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1, "cyberattack": 1, "data leak": 1 } with open("keyword_weights.json", "w") as f: json.dump(keyword_weights, f, indent=4) return keyword_weights keyword_weights = load_keyword_weights() # Lazy-load models @st.cache_resource def init_models(): try: summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1) classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1) return summarizer, classifier except Exception as e: st.error(f"Failed to initialize models: {str(e)}") st.stop() summarizer, classifier = None, None # Sidebar controls with st.sidebar: st.title("NIFTY 50 News Analysis") st.info("Analyze news sentiment for companies by sector over different time frames.", icon="â„šī¸") sectors = sorted(set(data['sector'] for data in nifty_50_data.values())) selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze") selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news") button = st.button("Analyze News", key="analyze_button") # Function to calculate time range def get_date_range(period): end_date = datetime.now() if period == "1D": start_date = end_date - timedelta(hours=36) # Broaden to 36 hours elif period == "5D": start_date = end_date - timedelta(days=5) elif period == "1M": start_date = end_date - timedelta(days=30) elif period == "6M": start_date = end_date - timedelta(days=180) elif period == "YTD": start_date = datetime(end_date.year, 1, 1) elif period == "1Y": start_date = end_date - timedelta(days=365) else: # 5Y start_date = end_date - timedelta(days=365 * 5) return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') # Async news fetching with retry logic @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5)) async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"): try: newsapi = NewsApiClient(api_key=api_key) page_size = 50 if period == "1D" else page_size # Increase for 1D articles = newsapi.get_everything( q=company_name, from_param=from_date, to=to_date if period != "1D" else None, language="en", sort_by="publishedAt", page_size=page_size )["articles"] if period == "1D": relevant_articles = articles # No filtering for 1D else: relevant_articles = [] for article in articles: title = (article.get("title", "") or "").lower() desc = (article.get("description", "") or "").lower() if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()): article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc) relevant_articles.append(article) logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}") return company_name, relevant_articles[:5] except Exception as e: logger.error(f"Error fetching news for {company_name}: {str(e)}") st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.") return company_name, [] # Batch summarize and classify articles def summarize_and_classify_batch(news_articles): try: sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} summaries = [] key_themes = {} contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles] contents = [c[:1024] for c in contents if c] if not contents: return [], sentiment_counts, [] summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4) summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]] sentiment_results = classifier(summaries_texts, batch_size=4) for idx, article in enumerate(news_articles): if idx >= len(summaries_texts): continue summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx] sentences = summary.split(". ") key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary) sentiment_result = sentiment_results[idx] sentiment_label = sentiment_result["label"] sentiment_score = sentiment_result["score"] if sentiment_label == "POSITIVE" and sentiment_score > 0.6: sentiment_counts["Positive"] += 1 sentiment_display = "Positive" elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6: sentiment_counts["Negative"] += 1 sentiment_display = "Negative" else: sentiment_counts["Neutral"] += 1 sentiment_display = "Neutral" title = (article.get("title", "") or "").lower() desc = (article.get("description", "") or "").lower() for keyword in keyword_weights: if keyword in title or keyword in desc: key_themes[keyword] = key_themes.get(keyword, 0) + 1 summaries.append({ "title": article.get("title", "No title"), "summary": summary, "key_insight": key_insight, "sentiment": sentiment_display, "confidence": sentiment_score, "url": article.get("url", ""), "published_at": article.get("publishedAt", "") }) top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3] logger.info(f"Sentiment counts: {sentiment_counts}") return summaries[:3], sentiment_counts, top_themes except Exception as e: logger.error(f"Error in summarize_and_classify: {str(e)}") return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, [] # Display news articles def display_news_articles(news_articles, company_name, selected_period): colored_header( f"Summarized News for {company_name} ({selected_period})", description=f"Key Updates from the Selected Period", color_name="blue-70" ) for news in news_articles: with st.container(): st.markdown('
', unsafe_allow_html=True) col1, col2 = st.columns([3, 1]) with col1: st.subheader(news['title'], help="News article title") st.write(f"**Summary**: {news['summary']}") st.write(f"**Key Insight**: {news['key_insight']}") st.markdown(f"[Read More]({news['url']})", unsafe_allow_html=True) with col2: if news['sentiment'] == "Positive": st.markdown(f'đŸŸĸ Positive ({news["confidence"]*100:.1f}%)', unsafe_allow_html=True) elif news['sentiment'] == "Negative": st.markdown(f'🔴 Negative ({news["confidence"]*100:.1f}%)', unsafe_allow_html=True) else: st.markdown(f'âšĒ Neutral ({news["confidence"]*100:.1f}%)', unsafe_allow_html=True) st.write(f"**Published**: {news['published_at']}") st.markdown('
', unsafe_allow_html=True) # Main app logic st.title("📰 NIFTY 50 Sector News Analysis") st.markdown("Analyze news sentiment for companies in a selected sector to guide investment decisions.", unsafe_allow_html=True) if button: if not summarizer or not classifier: summarizer, classifier = init_models() api_key = os.getenv("NEWSAPI_KEY") if not api_key: st.error("NEWSAPI_KEY environment variable not set. Please configure it.") st.stop() with st.spinner("Fetching and analyzing news..."): from_date, to_date = get_date_range(selected_period) companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector} if not companies_in_sector: st.warning(f"No companies found for {selected_sector} sector.") st.stop() sentiment_data = [] all_news = {} sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} max_articles = 0 sector_themes = {} async def fetch_all_news(): async with aiohttp.ClientSession() as session: tasks = [ fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period=selected_period) for company_name in companies_in_sector.keys() ] return await asyncio.gather(*tasks, return_exceptions=True) progress_bar = st.progress(0) progress_text = st.empty() results = asyncio.run(fetch_all_news()) for idx, (company_name, news_articles) in enumerate(results): progress_bar.progress((idx + 1) / len(companies_in_sector)) progress_text.text(f"Processing {company_name} ({idx + 1}/{len(companies_in_sector)})") if news_articles: summarized_news, sentiment_counts, top_themes = summarize_and_classify_batch(news_articles) total_articles = sum(sentiment_counts.values()) max_articles = max(max_articles, total_articles) sentiment_score = (sentiment_counts["Positive"] - sentiment_counts["Negative"]) / total_articles if total_articles > 0 else 0 dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get) sentiment_data.append({ "Company": company_name, "Positive": sentiment_counts["Positive"], "Negative": sentiment_counts["Negative"], "Neutral": sentiment_counts["Neutral"], "Total": total_articles, "Sentiment Score": sentiment_score, "Dominant Sentiment": dominant_sentiment, "Top Themes": [theme[0] for theme in top_themes] }) all_news[company_name] = summarized_news for sentiment, count in sentiment_counts.items(): sector_sentiment_counts[sentiment] += count for theme, count in top_themes: sector_themes[theme] = sector_themes.get(theme, 0) + count else: st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}") progress_bar.empty() progress_text.empty() if sentiment_data: colored_header( f"Sentiment Analysis for {selected_sector} Sector ({selected_period})", description=f"News from {from_date} to {to_date}", color_name="blue-70" ) sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]] sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False) st.subheader("Company Sentiment Overview") st.table(sentiment_df) st.subheader("Sentiment Score Distribution") fig = px.bar( sentiment_df, x="Company", y="Sentiment Score", color="Sentiment Score", color_continuous_scale="RdYlGn", title="Sentiment Scores by Company", labels={"Sentiment Score": "Sentiment Score (-1 to 1)"}, height=400 ) st.plotly_chart(fig, use_container_width=True) colored_header("📊 Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70") st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True) sector_total = sum(sector_sentiment_counts.values()) sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0 sector_negative_pct = (sector_sentiment_counts["Negative"] / sector_total * 100) if sector_total > 0 else 0 sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0 sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral" st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)") outlook = "Favorable 📈" if sector_positive_pct > 50 else "Cautious 📉" if sector_negative_pct > 50 else "Neutral âš–ī¸" st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.") negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]] if negative_themes: st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.") st.markdown("**Top Picks**:") for company in sentiment_df.head(2).to_dict("records"): st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).") st.markdown("**Tips**:") st.markdown("- Prioritize companies with high article counts for stronger signals.") st.markdown("- Check Neutral news for hidden opportunities or risks.") st.markdown("**Company Insights**:") for company in sentiment_data: confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low" recommendation = "Consider buying 📈" if company["Sentiment Score"] > 0.3 else "Avoid 📉" if company["Sentiment Score"] < -0.3 else "Monitor âš–ī¸" themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none" st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.") for company_name in sentiment_df["Company"]: if company_name in all_news and all_news[company_name]: display_news_articles(all_news[company_name], company_name, selected_period) else: st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.") # Footer st.markdown('', unsafe_allow_html=True)