Spaces:

MtotoWaJemo
/

nifty-news-analysis

Running

App Files Files Community

nifty-news-analysis / app.py

MtotoWaJemo

Update app.py

b24af4e verified 14 days ago

raw

history blame contribute delete

23.1 kB

	import streamlit as st
	import asyncio
	import aiohttp
	from newsapi import NewsApiClient
	from transformers import pipeline
	from streamlit_extras.colored_header import colored_header
	from datetime import datetime, timedelta
	import pandas as pd
	import plotly.express as px
	import json
	import os
	from tenacity import retry, stop_after_attempt, wait_exponential
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# NIFTY 50 companies with tickers and sectors
	nifty_50_data = {
	"Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"},
	"Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"},
	"Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"},
	"Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"},
	"Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"},
	"Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"},
	"Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"},
	"Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"},
	"Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"},
	"BPCL": {"ticker": "BPCL.NS", "sector": "Energy"},
	"Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"},
	"Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"},
	"Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"},
	"Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"},
	"Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"},
	"Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"},
	"Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"},
	"HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"},
	"HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"},
	"HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"},
	"Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"},
	"Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"},
	"HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"},
	"ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"},
	"IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"},
	"Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"},
	"ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"},
	"JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"},
	"Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"},
	"L&T": {"ticker": "LT.NS", "sector": "Industrials"},
	"L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"},
	"M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"},
	"Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"},
	"Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"},
	"NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"},
	"ONGC": {"ticker": "ONGC.NS", "sector": "Energy"},
	"Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"},
	"Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"},
	"SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"},
	"SBI": {"ticker": "SBIN.NS", "sector": "Financials"},
	"Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"},
	"Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"},
	"Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"},
	"Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"},
	"Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"},
	"TCS": {"ticker": "TCS.NS", "sector": "Information Technology"},
	"Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"},
	"Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"},
	"UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"},
	"Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"},
	}

	# Streamlit app setup
	st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide")

	# Custom CSS with improved accessibility
	st.markdown("""
	<style>
	.stApp {
	background: linear-gradient(to bottom right, #f7fafc, #edf2f7);
	}
	.sidebar .sidebar-content {
	background: linear-gradient(to bottom, #2d3748, #4a5568);
	color: white;
	border-radius: 8px;
	padding: 15px;
	}
	.stButton>button {
	background-color: #3182ce;
	color: white;
	border-radius: 6px;
	padding: 8px 16px;
	font-weight: bold;
	transition: background-color 0.3s;
	}
	.stButton>button:hover {
	background-color: #2b6cb0;
	}
	.stTable {
	border: 1px solid #e2e8f0;
	border-radius: 6px;
	background: #ffffff;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	}
	.news-container {
	border: 1px solid #e2e8f0;
	border-radius: 6px;
	padding: 12px;
	margin-bottom: 12px;
	background: #f7fafc;
	}
	.footer {
	text-align: center;
	padding: 15px;
	color: #4a5568;
	font-size: 14px;
	}
	[role="alert"] {
	outline: 2px solid #3182ce;
	}
	.sentiment-positive {
	color: #38a169;
	font-weight: bold;
	}
	.sentiment-negative {
	color: #e53e3e;
	font-weight: bold;
	}
	.sentiment-neutral {
	color: #718096;
	font-weight: bold;
	}
	</style>
	""", unsafe_allow_html=True)

	# Load keyword weights from JSON
	@st.cache_resource
	def load_keyword_weights():
	try:
	with open("keyword_weights.json", "r") as f:
	return json.load(f)
	except FileNotFoundError:
	keyword_weights = {
	"revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3,
	"share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3,
	"downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3,
	"acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2,
	"investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2,
	"SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5,
	"tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5,
	"appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5,
	"inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1,
	"export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1,
	"cyberattack": 1, "data leak": 1
	}
	with open("keyword_weights.json", "w") as f:
	json.dump(keyword_weights, f, indent=4)
	return keyword_weights

	keyword_weights = load_keyword_weights()

	# Lazy-load models
	@st.cache_resource
	def init_models():
	try:
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1)
	classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
	return summarizer, classifier
	except Exception as e:
	st.error(f"Failed to initialize models: {str(e)}")
	st.stop()

	summarizer, classifier = None, None

	# Sidebar controls
	with st.sidebar:
	st.title("NIFTY 50 News Analysis")
	st.info("Analyze news sentiment for companies by sector over different time frames.", icon="ℹ️")
	sectors = sorted(set(data['sector'] for data in nifty_50_data.values()))
	selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze")
	selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news")
	button = st.button("Analyze News", key="analyze_button")

	# Function to calculate time range
	def get_date_range(period):
	end_date = datetime.now()
	if period == "1D":
	start_date = end_date - timedelta(hours=36) # Broaden to 36 hours
	elif period == "5D":
	start_date = end_date - timedelta(days=5)
	elif period == "1M":
	start_date = end_date - timedelta(days=30)
	elif period == "6M":
	start_date = end_date - timedelta(days=180)
	elif period == "YTD":
	start_date = datetime(end_date.year, 1, 1)
	elif period == "1Y":
	start_date = end_date - timedelta(days=365)
	else: # 5Y
	start_date = end_date - timedelta(days=365 * 5)
	return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')

	# Async news fetching with retry logic
	@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
	async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
	try:
	newsapi = NewsApiClient(api_key=api_key)
	page_size = 50 if period == "1D" else page_size # Increase for 1D
	articles = newsapi.get_everything(
	q=company_name,
	from_param=from_date,
	to=to_date if period != "1D" else None,
	language="en",
	sort_by="publishedAt",
	page_size=page_size
	)["articles"]
	if period == "1D":
	relevant_articles = articles # No filtering for 1D
	else:
	relevant_articles = []
	for article in articles:
	title = (article.get("title", "") or "").lower()
	desc = (article.get("description", "") or "").lower()
	if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()):
	article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
	relevant_articles.append(article)
	logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}")
	return company_name, relevant_articles[:5]
	except Exception as e:
	logger.error(f"Error fetching news for {company_name}: {str(e)}")
	st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.")
	return company_name, []

	# Batch summarize and classify articles
	def summarize_and_classify_batch(news_articles):
	try:
	sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
	summaries = []
	key_themes = {}
	contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
	contents = [c[:1024] for c in contents if c]
	if not contents:
	return [], sentiment_counts, []

	summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
	summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]

	sentiment_results = classifier(summaries_texts, batch_size=4)

	for idx, article in enumerate(news_articles):
	if idx >= len(summaries_texts):
	continue
	summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx]
	sentences = summary.split(". ")
	key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary)
	sentiment_result = sentiment_results[idx]
	sentiment_label = sentiment_result["label"]
	sentiment_score = sentiment_result["score"]
	if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
	sentiment_counts["Positive"] += 1
	sentiment_display = "Positive"
	elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
	sentiment_counts["Negative"] += 1
	sentiment_display = "Negative"
	else:
	sentiment_counts["Neutral"] += 1
	sentiment_display = "Neutral"
	title = (article.get("title", "") or "").lower()
	desc = (article.get("description", "") or "").lower()
	for keyword in keyword_weights:
	if keyword in title or keyword in desc:
	key_themes[keyword] = key_themes.get(keyword, 0) + 1
	summaries.append({
	"title": article.get("title", "No title"),
	"summary": summary,
	"key_insight": key_insight,
	"sentiment": sentiment_display,
	"confidence": sentiment_score,
	"url": article.get("url", ""),
	"published_at": article.get("publishedAt", "")
	})
	top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
	logger.info(f"Sentiment counts: {sentiment_counts}")
	return summaries[:3], sentiment_counts, top_themes
	except Exception as e:
	logger.error(f"Error in summarize_and_classify: {str(e)}")
	return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []

	# Display news articles
	def display_news_articles(news_articles, company_name, selected_period):
	colored_header(
	f"Summarized News for {company_name} ({selected_period})",
	description=f"Key Updates from the Selected Period",
	color_name="blue-70"
	)
	for news in news_articles:
	with st.container():
	st.markdown('<div class="news-container" role="article">', unsafe_allow_html=True)
	col1, col2 = st.columns([3, 1])
	with col1:
	st.subheader(news['title'], help="News article title")
	st.write(f"Summary: {news['summary']}")
	st.write(f"Key Insight: {news['key_insight']}")
	st.markdown(f"[Read More]({news['url']})", unsafe_allow_html=True)
	with col2:
	if news['sentiment'] == "Positive":
	st.markdown(f'<span class="sentiment-positive" role="status">🟢 Positive ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
	elif news['sentiment'] == "Negative":
	st.markdown(f'<span class="sentiment-negative" role="status">🔴 Negative ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
	else:
	st.markdown(f'<span class="sentiment-neutral" role="status">⚪ Neutral ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
	st.write(f"Published: {news['published_at']}")
	st.markdown('</div>', unsafe_allow_html=True)

	# Main app logic
	st.title("📰 NIFTY 50 Sector News Analysis")
	st.markdown("Analyze news sentiment for companies in a selected sector to guide investment decisions.", unsafe_allow_html=True)

	if button:
	if not summarizer or not classifier:
	summarizer, classifier = init_models()

	api_key = os.getenv("NEWSAPI_KEY")
	if not api_key:
	st.error("NEWSAPI_KEY environment variable not set. Please configure it.")
	st.stop()

	with st.spinner("Fetching and analyzing news..."):
	from_date, to_date = get_date_range(selected_period)

	companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
	if not companies_in_sector:
	st.warning(f"No companies found for {selected_sector} sector.")
	st.stop()

	sentiment_data = []
	all_news = {}
	sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
	max_articles = 0
	sector_themes = {}

	async def fetch_all_news():
	async with aiohttp.ClientSession() as session:
	tasks = [
	fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period=selected_period)
	for company_name in companies_in_sector.keys()
	]
	return await asyncio.gather(*tasks, return_exceptions=True)

	progress_bar = st.progress(0)
	progress_text = st.empty()
	results = asyncio.run(fetch_all_news())

	for idx, (company_name, news_articles) in enumerate(results):
	progress_bar.progress((idx + 1) / len(companies_in_sector))
	progress_text.text(f"Processing {company_name} ({idx + 1}/{len(companies_in_sector)})")

	if news_articles:
	summarized_news, sentiment_counts, top_themes = summarize_and_classify_batch(news_articles)
	total_articles = sum(sentiment_counts.values())
	max_articles = max(max_articles, total_articles)
	sentiment_score = (sentiment_counts["Positive"] - sentiment_counts["Negative"]) / total_articles if total_articles > 0 else 0
	dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get)
	sentiment_data.append({
	"Company": company_name,
	"Positive": sentiment_counts["Positive"],
	"Negative": sentiment_counts["Negative"],
	"Neutral": sentiment_counts["Neutral"],
	"Total": total_articles,
	"Sentiment Score": sentiment_score,
	"Dominant Sentiment": dominant_sentiment,
	"Top Themes": [theme[0] for theme in top_themes]
	})
	all_news[company_name] = summarized_news
	for sentiment, count in sentiment_counts.items():
	sector_sentiment_counts[sentiment] += count
	for theme, count in top_themes:
	sector_themes[theme] = sector_themes.get(theme, 0) + count
	else:
	st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}")

	progress_bar.empty()
	progress_text.empty()

	if sentiment_data:
	colored_header(
	f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
	description=f"News from {from_date} to {to_date}",
	color_name="blue-70"
	)

	sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
	sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
	st.subheader("Company Sentiment Overview")
	st.table(sentiment_df)

	st.subheader("Sentiment Score Distribution")
	fig = px.bar(
	sentiment_df,
	x="Company",
	y="Sentiment Score",
	color="Sentiment Score",
	color_continuous_scale="RdYlGn",
	title="Sentiment Scores by Company",
	labels={"Sentiment Score": "Sentiment Score (-1 to 1)"},
	height=400
	)
	st.plotly_chart(fig, use_container_width=True)

	colored_header("📊 Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70")
	st.markdown("Disclaimer: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True)

	sector_total = sum(sector_sentiment_counts.values())
	sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
	sector_negative_pct = (sector_sentiment_counts["Negative"] / sector_total * 100) if sector_total > 0 else 0
	sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
	sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"

	st.markdown(f"Sector Sentiment: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
	outlook = "Favorable 📈" if sector_positive_pct > 50 else "Cautious 📉" if sector_negative_pct > 50 else "Neutral ⚖️"
	st.markdown(f"- Investment Outlook: {outlook} for {selected_sector} sector.")

	negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
	if negative_themes:
	st.markdown(f"- Risks: Watch for issues related to {', '.join(negative_themes[:2])}.")

	st.markdown("Top Picks:")
	for company in sentiment_df.head(2).to_dict("records"):
	st.markdown(f"- {company['Company']}: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).")

	st.markdown("Tips:")
	st.markdown("- Prioritize companies with high article counts for stronger signals.")
	st.markdown("- Check Neutral news for hidden opportunities or risks.")

	st.markdown("Company Insights:")
	for company in sentiment_data:
	confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
	recommendation = "Consider buying 📈" if company["Sentiment Score"] > 0.3 else "Avoid 📉" if company["Sentiment Score"] < -0.3 else "Monitor ⚖️"
	themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
	st.markdown(f"- {company['Company']}: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.")

	for company_name in sentiment_df["Company"]:
	if company_name in all_news and all_news[company_name]:
	display_news_articles(all_news[company_name], company_name, selected_period)
	else:
	st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.")

	# Footer
	st.markdown('<div class="footer">Created by MtotoWaJemo \| Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)