import streamlit as st import asyncio import aiohttp from newsapi import NewsApiClient from transformers import pipeline from streamlit_extras.colored_header import colored_header from datetime import datetime, timedelta import pandas as pd import plotly.express as px import json import os from tenacity import retry, stop_after_attempt, wait_exponential import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # NIFTY 50 companies with tickers and sectors nifty_50_data = { "Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"}, "Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"}, "Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"}, "Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"}, "Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"}, "Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"}, "Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"}, "Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"}, "Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"}, "BPCL": {"ticker": "BPCL.NS", "sector": "Energy"}, "Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"}, "Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"}, "Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"}, "Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"}, "Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"}, "Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"}, "Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"}, "HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"}, "HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"}, "HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"}, "Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"}, "Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"}, "HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"}, "ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"}, "IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"}, "Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"}, "ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"}, "JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"}, "Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"}, "L&T": {"ticker": "LT.NS", "sector": "Industrials"}, "L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"}, "M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"}, "Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"}, "Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"}, "NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"}, "ONGC": {"ticker": "ONGC.NS", "sector": "Energy"}, "Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"}, "Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"}, "SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"}, "SBI": {"ticker": "SBIN.NS", "sector": "Financials"}, "Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"}, "Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"}, "Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"}, "Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"}, "Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"}, "TCS": {"ticker": "TCS.NS", "sector": "Information Technology"}, "Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"}, "Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"}, "UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"}, "Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"}, } # Streamlit app setup st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide") # Custom CSS with improved accessibility st.markdown(""" """, unsafe_allow_html=True) # Load keyword weights from JSON @st.cache_resource def load_keyword_weights(): try: with open("keyword_weights.json", "r") as f: return json.load(f) except FileNotFoundError: keyword_weights = { "revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3, "share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3, "downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3, "acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2, "investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2, "SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5, "tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5, "appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5, "inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1, "export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1, "cyberattack": 1, "data leak": 1 } with open("keyword_weights.json", "w") as f: json.dump(keyword_weights, f, indent=4) return keyword_weights keyword_weights = load_keyword_weights() # Lazy-load models @st.cache_resource def init_models(): try: summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1) classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1) return summarizer, classifier except Exception as e: st.error(f"Failed to initialize models: {str(e)}") st.stop() summarizer, classifier = None, None # Sidebar controls with st.sidebar: st.title("NIFTY 50 News Analysis") st.info("Analyze news sentiment for companies by sector over different time frames.", icon="âšī¸") sectors = sorted(set(data['sector'] for data in nifty_50_data.values())) selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze") selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news") button = st.button("Analyze News", key="analyze_button") # Function to calculate time range def get_date_range(period): end_date = datetime.now() if period == "1D": start_date = end_date - timedelta(hours=36) # Broaden to 36 hours elif period == "5D": start_date = end_date - timedelta(days=5) elif period == "1M": start_date = end_date - timedelta(days=30) elif period == "6M": start_date = end_date - timedelta(days=180) elif period == "YTD": start_date = datetime(end_date.year, 1, 1) elif period == "1Y": start_date = end_date - timedelta(days=365) else: # 5Y start_date = end_date - timedelta(days=365 * 5) return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') # Async news fetching with retry logic @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5)) async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"): try: newsapi = NewsApiClient(api_key=api_key) page_size = 50 if period == "1D" else page_size # Increase for 1D articles = newsapi.get_everything( q=company_name, from_param=from_date, to=to_date if period != "1D" else None, language="en", sort_by="publishedAt", page_size=page_size )["articles"] if period == "1D": relevant_articles = articles # No filtering for 1D else: relevant_articles = [] for article in articles: title = (article.get("title", "") or "").lower() desc = (article.get("description", "") or "").lower() if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()): article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc) relevant_articles.append(article) logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}") return company_name, relevant_articles[:5] except Exception as e: logger.error(f"Error fetching news for {company_name}: {str(e)}") st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.") return company_name, [] # Batch summarize and classify articles def summarize_and_classify_batch(news_articles): try: sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0} summaries = [] key_themes = {} contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles] contents = [c[:1024] for c in contents if c] if not contents: return [], sentiment_counts, [] summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4) summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]] sentiment_results = classifier(summaries_texts, batch_size=4) for idx, article in enumerate(news_articles): if idx >= len(summaries_texts): continue summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx] sentences = summary.split(". ") key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary) sentiment_result = sentiment_results[idx] sentiment_label = sentiment_result["label"] sentiment_score = sentiment_result["score"] if sentiment_label == "POSITIVE" and sentiment_score > 0.6: sentiment_counts["Positive"] += 1 sentiment_display = "Positive" elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6: sentiment_counts["Negative"] += 1 sentiment_display = "Negative" else: sentiment_counts["Neutral"] += 1 sentiment_display = "Neutral" title = (article.get("title", "") or "").lower() desc = (article.get("description", "") or "").lower() for keyword in keyword_weights: if keyword in title or keyword in desc: key_themes[keyword] = key_themes.get(keyword, 0) + 1 summaries.append({ "title": article.get("title", "No title"), "summary": summary, "key_insight": key_insight, "sentiment": sentiment_display, "confidence": sentiment_score, "url": article.get("url", ""), "published_at": article.get("publishedAt", "") }) top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3] logger.info(f"Sentiment counts: {sentiment_counts}") return summaries[:3], sentiment_counts, top_themes except Exception as e: logger.error(f"Error in summarize_and_classify: {str(e)}") return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, [] # Display news articles def display_news_articles(news_articles, company_name, selected_period): colored_header( f"Summarized News for {company_name} ({selected_period})", description=f"Key Updates from the Selected Period", color_name="blue-70" ) for news in news_articles: with st.container(): st.markdown('