import streamlit as st
import asyncio
import aiohttp
from newsapi import NewsApiClient
from transformers import pipeline
from streamlit_extras.colored_header import colored_header
from datetime import datetime, timedelta
import pandas as pd
import plotly.express as px
import json
import os
from tenacity import retry, stop_after_attempt, wait_exponential
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# NIFTY 50 companies with tickers and sectors
nifty_50_data = {
    "Adani Enterprises": {"ticker": "ADANIENT.NS", "sector": "Industrials"},
    "Adani Ports": {"ticker": "ADANIPORTS.NS", "sector": "Industrials"},
    "Apollo Hospitals": {"ticker": "APOLLOHOSP.NS", "sector": "Healthcare"},
    "Asian Paints": {"ticker": "ASIANPAINT.NS", "sector": "Consumer Discretionary"},
    "Axis Bank": {"ticker": "AXISBANK.NS", "sector": "Financials"},
    "Bajaj Auto": {"ticker": "BAJAJ-AUTO.NS", "sector": "Consumer Discretionary"},
    "Bajaj Finserv": {"ticker": "BAJAJFINSV.NS", "sector": "Financials"},
    "Bajaj Finance": {"ticker": "BAJFINANCE.NS", "sector": "Financials"},
    "Bharti Airtel": {"ticker": "BHARTIARTL.NS", "sector": "Communication Services"},
    "BPCL": {"ticker": "BPCL.NS", "sector": "Energy"},
    "Britannia": {"ticker": "BRITANNIA.NS", "sector": "Consumer Staples"},
    "Cipla": {"ticker": "CIPLA.NS", "sector": "Healthcare"},
    "Coal India": {"ticker": "COALINDIA.NS", "sector": "Energy"},
    "Divis Labs": {"ticker": "DIVISLAB.NS", "sector": "Healthcare"},
    "Dr. Reddy's Labs": {"ticker": "DRREDDY.NS", "sector": "Healthcare"},
    "Eicher Motors": {"ticker": "EICHERMOT.NS", "sector": "Consumer Discretionary"},
    "Grasim Industries": {"ticker": "GRASIM.NS", "sector": "Materials"},
    "HCL Technologies": {"ticker": "HCLTECH.NS", "sector": "Information Technology"},
    "HDFC Bank": {"ticker": "HDFCBANK.NS", "sector": "Financials"},
    "HDFC Life": {"ticker": "HDFCLIFE.NS", "sector": "Financials"},
    "Hero MotoCorp": {"ticker": "HEROMOTOCO.NS", "sector": "Consumer Discretionary"},
    "Hindalco": {"ticker": "HINDALCO.NS", "sector": "Materials"},
    "HUL": {"ticker": "HINDUNILVR.NS", "sector": "Consumer Staples"},
    "ICICI Bank": {"ticker": "ICICIBANK.NS", "sector": "Financials"},
    "IndusInd Bank": {"ticker": "INDUSINDBK.NS", "sector": "Financials"},
    "Infosys": {"ticker": "INFY.NS", "sector": "Information Technology"},
    "ITC": {"ticker": "ITC.NS", "sector": "Consumer Staples"},
    "JSW Steel": {"ticker": "JSWSTEEL.NS", "sector": "Materials"},
    "Kotak Mahindra Bank": {"ticker": "KOTAKBANK.NS", "sector": "Financials"},
    "L&T": {"ticker": "LT.NS", "sector": "Industrials"},
    "L&T Technology Services": {"ticker": "LTIM.NS", "sector": "Information Technology"},
    "M&M": {"ticker": "M&M.NS", "sector": "Consumer Discretionary"},
    "Maruti Suzuki": {"ticker": "MARUTI.NS", "sector": "Consumer Discretionary"},
    "Nestle India": {"ticker": "NESTLEIND.NS", "sector": "Consumer Staples"},
    "NTPC": {"ticker": "NTPC.NS", "sector": "Utilities"},
    "ONGC": {"ticker": "ONGC.NS", "sector": "Energy"},
    "Power Grid": {"ticker": "POWERGRID.NS", "sector": "Utilities"},
    "Reliance": {"ticker": "RELIANCE.NS", "sector": "Energy"},
    "SBI Life": {"ticker": "SBILIFE.NS", "sector": "Financials"},
    "SBI": {"ticker": "SBIN.NS", "sector": "Financials"},
    "Shriram Finance": {"ticker": "SHRIRAMFIN.NS", "sector": "Financials"},
    "Sun Pharma": {"ticker": "SUNPHARMA.NS", "sector": "Healthcare"},
    "Tata Consumer Products": {"ticker": "TATACONSUM.NS", "sector": "Consumer Staples"},
    "Tata Motors": {"ticker": "TATAMOTORS.NS", "sector": "Consumer Discretionary"},
    "Tata Steel": {"ticker": "TATASTEEL.NS", "sector": "Materials"},
    "TCS": {"ticker": "TCS.NS", "sector": "Information Technology"},
    "Tech Mahindra": {"ticker": "TECHM.NS", "sector": "Information Technology"},
    "Titan": {"ticker": "TITAN.NS", "sector": "Consumer Discretionary"},
    "UltraTech Cement": {"ticker": "ULTRACEMCO.NS", "sector": "Materials"},
    "Wipro": {"ticker": "WIPRO.NS", "sector": "Information Technology"},
}

# Streamlit app setup
st.set_page_config(page_title="NIFTY 50 News Analysis", layout="wide")

# Custom CSS with improved accessibility
st.markdown("""
    <style>
    .stApp {
        background: linear-gradient(to bottom right, #f7fafc, #edf2f7);
    }
    .sidebar .sidebar-content {
        background: linear-gradient(to bottom, #2d3748, #4a5568);
        color: white;
        border-radius: 8px;
        padding: 15px;
    }
    .stButton>button {
        background-color: #3182ce;
        color: white;
        border-radius: 6px;
        padding: 8px 16px;
        font-weight: bold;
        transition: background-color 0.3s;
    }
    .stButton>button:hover {
        background-color: #2b6cb0;
    }
    .stTable {
        border: 1px solid #e2e8f0;
        border-radius: 6px;
        background: #ffffff;
        box-shadow: 0 1px 3px rgba(0,0,0,0.1);
    }
    .news-container {
        border: 1px solid #e2e8f0;
        border-radius: 6px;
        padding: 12px;
        margin-bottom: 12px;
        background: #f7fafc;
    }
    .footer {
        text-align: center;
        padding: 15px;
        color: #4a5568;
        font-size: 14px;
    }
    [role="alert"] {
        outline: 2px solid #3182ce;
    }
    .sentiment-positive {
        color: #38a169;
        font-weight: bold;
    }
    .sentiment-negative {
        color: #e53e3e;
        font-weight: bold;
    }
    .sentiment-neutral {
        color: #718096;
        font-weight: bold;
    }
    </style>
""", unsafe_allow_html=True)

# Load keyword weights from JSON
@st.cache_resource
def load_keyword_weights():
    try:
        with open("keyword_weights.json", "r") as f:
            return json.load(f)
    except FileNotFoundError:
        keyword_weights = {
            "revenue": 3, "profit": 3, "loss": 3, "earnings": 3, "EBITDA": 3, "quarterly results": 3, "annual report": 3,
            "share price": 3, "market cap": 3, "dividend": 3, "buyback": 3, "stock split": 3, "bonus issue": 3,
            "downgrade": 3, "upgrade": 3, "bullish": 3, "bearish": 3, "rating change": 3,
            "acquisition": 2, "merger": 2, "takeover": 2, "buyout": 2, "new plant": 2, "factory": 2, "expansion": 2,
            "investment": 2, "launch": 2, "R&D": 2, "deal": 2, "agreement": 2, "MoU": 2, "partnership": 2, "collaboration": 2,
            "SEBI": 1.5, "fine": 1.5, "violation": 1.5, "compliance": 1.5, "FIR": 1.5, "probe": 1.5, "subsidy": 1.5,
            "tax": 1.5, "regulation": 1.5, "policy change": 1.5, "license": 1.5, "CEO": 1.5, "CFO": 1.5, "resigns": 1.5,
            "appointed": 1.5, "stepping down": 1.5, "fraud": 1.5, "scandal": 1.5, "mismanagement": 1.5, "whistleblower": 1.5,
            "inflation": 1, "GDP": 1, "interest rate": 1, "RBI policy": 1, "sanctions": 1, "trade war": 1, "conflict": 1,
            "export/import": 1, "recall": 1, "defect": 1, "complaint": 1, "customer issue": 1, "hack": 1, "breach": 1,
            "cyberattack": 1, "data leak": 1
        }
        with open("keyword_weights.json", "w") as f:
            json.dump(keyword_weights, f, indent=4)
        return keyword_weights

keyword_weights = load_keyword_weights()

# Lazy-load models
@st.cache_resource
def init_models():
    try:
        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=-1)
        classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
        return summarizer, classifier
    except Exception as e:
        st.error(f"Failed to initialize models: {str(e)}")
        st.stop()

summarizer, classifier = None, None

# Sidebar controls
with st.sidebar:
    st.title("NIFTY 50 News Analysis")
    st.info("Analyze news sentiment for companies by sector over different time frames.", icon="ℹ️")
    sectors = sorted(set(data['sector'] for data in nifty_50_data.values()))
    selected_sector = st.selectbox("Select a Sector", sectors, help="Choose a sector to analyze")
    selected_period = st.selectbox("Select Time Frame", ["1D", "5D", "1M", "6M", "YTD", "1Y", "5Y"], index=2, help="Select the time range for news")
    button = st.button("Analyze News", key="analyze_button")

# Function to calculate time range
def get_date_range(period):
    end_date = datetime.now()
    if period == "1D":
        start_date = end_date - timedelta(hours=36)  # Broaden to 36 hours
    elif period == "5D":
        start_date = end_date - timedelta(days=5)
    elif period == "1M":
        start_date = end_date - timedelta(days=30)
    elif period == "6M":
        start_date = end_date - timedelta(days=180)
    elif period == "YTD":
        start_date = datetime(end_date.year, 1, 1)
    elif period == "1Y":
        start_date = end_date - timedelta(days=365)
    else:  # 5Y
        start_date = end_date - timedelta(days=365 * 5)
    return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')

# Async news fetching with retry logic
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=5))
async def fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period="1D"):
    try:
        newsapi = NewsApiClient(api_key=api_key)
        page_size = 50 if period == "1D" else page_size  # Increase for 1D
        articles = newsapi.get_everything(
            q=company_name,
            from_param=from_date,
            to=to_date if period != "1D" else None,
            language="en",
            sort_by="publishedAt",
            page_size=page_size
        )["articles"]
        if period == "1D":
            relevant_articles = articles  # No filtering for 1D
        else:
            relevant_articles = []
            for article in articles:
                title = (article.get("title", "") or "").lower()
                desc = (article.get("description", "") or "").lower()
                if any(keyword in title or keyword in desc for keyword in keyword_weights.keys()):
                    article["relevance_weight"] = sum(keyword_weights.get(keyword, 0) for keyword in keyword_weights if keyword in title or keyword in desc)
                    relevant_articles.append(article)
        logger.info(f"Fetched {len(articles)} articles, {len(relevant_articles)} relevant for {company_name} in {period}")
        return company_name, relevant_articles[:5]
    except Exception as e:
        logger.error(f"Error fetching news for {company_name}: {str(e)}")
        st.error(f"Failed to fetch news for {company_name}: {str(e)}. Check NEWSAPI_KEY or try again later.")
        return company_name, []

# Batch summarize and classify articles
def summarize_and_classify_batch(news_articles):
    try:
        sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
        summaries = []
        key_themes = {}
        contents = [article.get("content", "") or article.get("description", "") or article.get("title", "") for article in news_articles]
        contents = [c[:1024] for c in contents if c]
        if not contents:
            return [], sentiment_counts, []

        summaries_raw = summarizer(contents, max_length=80, min_length=20, do_sample=False, batch_size=4)
        summaries_texts = [s["summary_text"] for s in summaries_raw] if isinstance(summaries_raw, list) else [summaries_raw["summary_text"]]
        
        sentiment_results = classifier(summaries_texts, batch_size=4)
        
        for idx, article in enumerate(news_articles):
            if idx >= len(summaries_texts):
                continue
            summary = summaries_texts[idx] if len(article.get("content", "") or article.get("description", "") or article.get("title", "")) > 100 else contents[idx]
            sentences = summary.split(". ")
            key_insight = max(sentences, key=lambda s: sum(keyword_weights.get(k, 0) for k in keyword_weights if k in s.lower()), default=summary)
            sentiment_result = sentiment_results[idx]
            sentiment_label = sentiment_result["label"]
            sentiment_score = sentiment_result["score"]
            if sentiment_label == "POSITIVE" and sentiment_score > 0.6:
                sentiment_counts["Positive"] += 1
                sentiment_display = "Positive"
            elif sentiment_label == "NEGATIVE" and sentiment_score > 0.6:
                sentiment_counts["Negative"] += 1
                sentiment_display = "Negative"
            else:
                sentiment_counts["Neutral"] += 1
                sentiment_display = "Neutral"
            title = (article.get("title", "") or "").lower()
            desc = (article.get("description", "") or "").lower()
            for keyword in keyword_weights:
                if keyword in title or keyword in desc:
                    key_themes[keyword] = key_themes.get(keyword, 0) + 1
            summaries.append({
                "title": article.get("title", "No title"),
                "summary": summary,
                "key_insight": key_insight,
                "sentiment": sentiment_display,
                "confidence": sentiment_score,
                "url": article.get("url", ""),
                "published_at": article.get("publishedAt", "")
            })
        top_themes = sorted(key_themes.items(), key=lambda x: x[1], reverse=True)[:3]
        logger.info(f"Sentiment counts: {sentiment_counts}")
        return summaries[:3], sentiment_counts, top_themes
    except Exception as e:
        logger.error(f"Error in summarize_and_classify: {str(e)}")
        return [], {"Positive": 0, "Negative": 0, "Neutral": 0}, []

# Display news articles
def display_news_articles(news_articles, company_name, selected_period):
    colored_header(
        f"Summarized News for {company_name} ({selected_period})",
        description=f"Key Updates from the Selected Period",
        color_name="blue-70"
    )
    for news in news_articles:
        with st.container():
            st.markdown('<div class="news-container" role="article">', unsafe_allow_html=True)
            col1, col2 = st.columns([3, 1])
            with col1:
                st.subheader(news['title'], help="News article title")
                st.write(f"**Summary**: {news['summary']}")
                st.write(f"**Key Insight**: {news['key_insight']}")
                st.markdown(f"[Read More]({news['url']})", unsafe_allow_html=True)
            with col2:
                if news['sentiment'] == "Positive":
                    st.markdown(f'<span class="sentiment-positive" role="status">🟢 Positive ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
                elif news['sentiment'] == "Negative":
                    st.markdown(f'<span class="sentiment-negative" role="status">🔴 Negative ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
                else:
                    st.markdown(f'<span class="sentiment-neutral" role="status">⚪ Neutral ({news["confidence"]*100:.1f}%)</span>', unsafe_allow_html=True)
                st.write(f"**Published**: {news['published_at']}")
            st.markdown('</div>', unsafe_allow_html=True)

# Main app logic
st.title("📰 NIFTY 50 Sector News Analysis")
st.markdown("Analyze news sentiment for companies in a selected sector to guide investment decisions.", unsafe_allow_html=True)

if button:
    if not summarizer or not classifier:
        summarizer, classifier = init_models()

    api_key = os.getenv("NEWSAPI_KEY")
    if not api_key:
        st.error("NEWSAPI_KEY environment variable not set. Please configure it.")
        st.stop()

    with st.spinner("Fetching and analyzing news..."):
        from_date, to_date = get_date_range(selected_period)
        
        companies_in_sector = {name: data for name, data in nifty_50_data.items() if data['sector'] == selected_sector}
        if not companies_in_sector:
            st.warning(f"No companies found for {selected_sector} sector.")
            st.stop()

        sentiment_data = []
        all_news = {}
        sector_sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
        max_articles = 0
        sector_themes = {}
        
        async def fetch_all_news():
            async with aiohttp.ClientSession() as session:
                tasks = [
                    fetch_news_async(session, company_name, from_date, to_date, api_key, page_size=20, period=selected_period)
                    for company_name in companies_in_sector.keys()
                ]
                return await asyncio.gather(*tasks, return_exceptions=True)

        progress_bar = st.progress(0)
        progress_text = st.empty()
        results = asyncio.run(fetch_all_news())
        
        for idx, (company_name, news_articles) in enumerate(results):
            progress_bar.progress((idx + 1) / len(companies_in_sector))
            progress_text.text(f"Processing {company_name} ({idx + 1}/{len(companies_in_sector)})")
            
            if news_articles:
                summarized_news, sentiment_counts, top_themes = summarize_and_classify_batch(news_articles)
                total_articles = sum(sentiment_counts.values())
                max_articles = max(max_articles, total_articles)
                sentiment_score = (sentiment_counts["Positive"] - sentiment_counts["Negative"]) / total_articles if total_articles > 0 else 0
                dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get)
                sentiment_data.append({
                    "Company": company_name,
                    "Positive": sentiment_counts["Positive"],
                    "Negative": sentiment_counts["Negative"],
                    "Neutral": sentiment_counts["Neutral"],
                    "Total": total_articles,
                    "Sentiment Score": sentiment_score,
                    "Dominant Sentiment": dominant_sentiment,
                    "Top Themes": [theme[0] for theme in top_themes]
                })
                all_news[company_name] = summarized_news
                for sentiment, count in sentiment_counts.items():
                    sector_sentiment_counts[sentiment] += count
                for theme, count in top_themes:
                    sector_themes[theme] = sector_themes.get(theme, 0) + count
            else:
                st.warning(f"No news found for {company_name}.{' Try a longer time frame like 5D.' if selected_period == '1D' else ''}")

        progress_bar.empty()
        progress_text.empty()

        if sentiment_data:
            colored_header(
                f"Sentiment Analysis for {selected_sector} Sector ({selected_period})",
                description=f"News from {from_date} to {to_date}",
                color_name="blue-70"
            )
            
            sentiment_df = pd.DataFrame(sentiment_data)[["Company", "Positive", "Negative", "Neutral", "Total", "Sentiment Score"]]
            sentiment_df = sentiment_df.sort_values("Sentiment Score", ascending=False)
            st.subheader("Company Sentiment Overview")
            st.table(sentiment_df)
            
            st.subheader("Sentiment Score Distribution")
            fig = px.bar(
                sentiment_df,
                x="Company",
                y="Sentiment Score",
                color="Sentiment Score",
                color_continuous_scale="RdYlGn",
                title="Sentiment Scores by Company",
                labels={"Sentiment Score": "Sentiment Score (-1 to 1)"},
                height=400
            )
            st.plotly_chart(fig, use_container_width=True)
            
            colored_header("📊 Decision Guidance", description="Investment Insights from News Sentiment", color_name="violet-70")
            st.markdown("**Disclaimer**: These are news-based insights, not financial advice. Consult a financial advisor.", unsafe_allow_html=True)
            
            sector_total = sum(sector_sentiment_counts.values())
            sector_positive_pct = (sector_sentiment_counts["Positive"] / sector_total * 100) if sector_total > 0 else 0
            sector_negative_pct = (sector_sentiment_counts["Negative"] / sector_total * 100) if sector_total > 0 else 0
            sector_neutral_pct = (sector_sentiment_counts["Neutral"] / sector_total * 100) if sector_total > 0 else 0
            sector_sentiment = "Positive" if sector_positive_pct > 50 else "Negative" if sector_negative_pct > 50 else "Neutral"
            
            st.markdown(f"**Sector Sentiment**: {sector_sentiment} ({sector_positive_pct:.1f}% Positive, {sector_negative_pct:.1f}% Negative, {sector_neutral_pct:.1f}% Neutral)")
            outlook = "Favorable 📈" if sector_positive_pct > 50 else "Cautious 📉" if sector_negative_pct > 50 else "Neutral ⚖️"
            st.markdown(f"- **Investment Outlook**: {outlook} for {selected_sector} sector.")
            
            negative_themes = [theme for theme, count in sorted(sector_themes.items(), key=lambda x: x[1], reverse=True) if theme in ["loss", "downgrade", "bearish", "fine", "violation", "probe", "fraud", "scandal"]]
            if negative_themes:
                st.markdown(f"- **Risks**: Watch for issues related to {', '.join(negative_themes[:2])}.")
            
            st.markdown("**Top Picks**:")
            for company in sentiment_df.head(2).to_dict("records"):
                st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({company['Positive']} Positive, {company['Neutral']} Neutral).")
            
            st.markdown("**Tips**:")
            st.markdown("- Prioritize companies with high article counts for stronger signals.")
            st.markdown("- Check Neutral news for hidden opportunities or risks.")
            
            st.markdown("**Company Insights**:")
            for company in sentiment_data:
                confidence = "High" if company["Total"] / max_articles > 0.7 else "Medium" if company["Total"] / max_articles > 0.3 else "Low"
                recommendation = "Consider buying 📈" if company["Sentiment Score"] > 0.3 else "Avoid 📉" if company["Sentiment Score"] < -0.3 else "Monitor ⚖️"
                themes_str = ", ".join(company["Top Themes"]) if company["Top Themes"] else "none"
                st.markdown(f"- **{company['Company']}**: Score: {company['Sentiment Score']:.2f} ({themes_str}, {company['Total']} articles, Confidence: {confidence}). {recommendation}.")
            
            for company_name in sentiment_df["Company"]:
                if company_name in all_news and all_news[company_name]:
                    display_news_articles(all_news[company_name], company_name, selected_period)
        else:
            st.warning(f"No news found for {selected_sector} sector. Try a longer time frame like 5D or 1M, or check your NEWSAPI_KEY.")

# Footer
st.markdown('<div class="footer">Created by MtotoWaJemo | Powered by NewsAPI & Transformers</div>', unsafe_allow_html=True)