Spaces:

Agrannya
/

Insagram_reel_Analyzer

Runtime error

File size: 35,550 Bytes

##FINAL FILE

# This deploy.py file contains the complete code for the Instagram Reels Analysis Gradio App.

# --- Imports ---
import gradio as gr
import time
import random
import matplotlib.pyplot as plt
import pandas as pd
import torch
import emoji
import re
import numpy as np
import io # Import io for handling image bytes


from instagrapi import Client
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    RobertaForSequenceClassification,
    AlbertForSequenceClassification
)
from datasets import Dataset, Features, Value
from collections import Counter
from sklearn.metrics import accuracy_score, f1_score

# --- Configuration ---
CONFIG = {
    "max_length": 128,
    "batch_size": 16,
    "learning_rate": 2e-5,
    "num_train_epochs": 3,
    "few_shot_examples": 5,  # per class
    "confidence_threshold": 0.7,
    "neutral_reanalysis_threshold": 0.33
}

# --- Global Variables for State Management ---
global cl
global explore_reels_list
global sentiment_analyzer_instance
global content_classifier_pipeline

cl = None
explore_reels_list = []
sentiment_analyzer_instance = None
content_classifier_pipeline = None


# --- Sentiment Analysis Class ---
class ReelSentimentAnalyzer:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self._initialize_models()

    def _initialize_models(self):
        """Initialize and configure all models"""
        print("\nInitializing Sentiment Analysis Models...")
        # English models
        print("Loading English Emotion Model...")
        self.emotion_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-emotion-analysis")
        self.emotion_model = AutoModelForSequenceClassification.from_pretrained(
            "finiteautomata/bertweet-base-emotion-analysis"
        ).to(self.device)
        print("Loading English Sentiment Model...")
        self.sentiment_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
        self.sentiment_model = RobertaForSequenceClassification.from_pretrained(
            "cardiffnlp/twitter-roberta-base-sentiment-latest",
            ignore_mismatched_sizes=True
        ).to(self.device)

        # Hindi/English model (we'll fine-tune this)
        print("Loading Indic-BERT Model for Hindi/Hinglish...")
        self.hindi_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
        self.hindi_model = AlbertForSequenceClassification.from_pretrained(
            "ai4bharat/indic-bert",
            num_labels=3,
            id2label={0: "negative", 1: "neutral", 2: "positive"},
            label2id={"negative": 0, "neutral": 1, "positive": 2}
        ).to(self.device)
        # Store label2id mapping for easy access
        self.hindi_label2id = self.hindi_model.config.label2id
        print("Models Initialized.")

        # Emotion to sentiment mapping
        self.emotion_map = {
            "joy": "positive", "love": "positive", "happy": "positive",
            "anger": "negative", "sadness": "negative", "fear": "negative",
            "surprise": "neutral", "neutral": "neutral", "disgust": "negative", "shame": "negative"
        }

        # Neutral keywords
        self.neutral_keywords = {
            "ad", "sponsored", "promo", "sale", "discount", "offer", "giveaway",
            "buy", "shop", "link in bio",
            "विज्ञापन", "प्रचार", "ऑफर", "डिस्काउंट", "बिक्री", "लिंक बायो में"
        }

    def train_hindi_model(self, train_data, eval_data=None):
        """
        Fine-tune the Hindi/English model on labeled data
        Args:
            train_data: List of dicts [{"text": "...", "label": "positive/negative/neutral"}]
            eval_data: Optional evaluation data
        """
        print("\nStarting Hindi model training...")
        # Convert to dataset
        train_dataset = Dataset.from_pandas(pd.DataFrame(train_data))

        # Map string labels to integer IDs
        def map_labels_to_ids(examples):
            # Ensure label exists and is in expected range
            labels = []
            for label_str in examples["label"]:
                 if label_str in self.hindi_label2id:
                     labels.append(self.hindi_label2id[label_str])
                 else:
                     # Handle unexpected labels, maybe map to neutral or skip
                     print(f"Warning: Unexpected label '{label_str}'. Mapping to neutral.")
                     labels.append(self.hindi_label2id["neutral"]) # Map unknown to neutral
            examples["label"] = labels
            return examples


        train_dataset = train_dataset.map(map_labels_to_ids, batched=True)

        # Explicitly set the label column to integer type
        train_dataset = train_dataset.cast_column("label", Value("int64"))


        def tokenize_function(examples):
            return self.hindi_tokenizer(
                examples["text"],
                padding="max_length",
                truncation=True,
                max_length=CONFIG["max_length"]
            )

        tokenized_train = train_dataset.map(tokenize_function, batched=True)

        # Training arguments - using eval_strategy instead of evaluation_strategy
        training_args = TrainingArguments(
            output_dir="./results",
            eval_strategy="epoch" if eval_data else "no",
            per_device_train_batch_size=CONFIG["batch_size"],
            per_device_eval_batch_size=CONFIG["batch_size"],
            learning_rate=CONFIG["learning_rate"],
            num_train_epochs=CONFIG["num_train_epochs"],
            weight_decay=0.01,
            save_strategy="no", # Don't save checkpoints during training
            logging_dir='./logs',
            logging_steps=10,
            report_to="none" # Don't report to external services
        )

        # Compute metrics function
        def compute_metrics(p):
            predictions, labels = p
            predictions = np.argmax(predictions, axis=1)
            return {
                "accuracy": accuracy_score(labels, predictions),
                "f1": f1_score(labels, predictions, average="weighted")
            }

        # Trainer
        eval_dataset_processed = None
        if eval_data:
            eval_dataset = Dataset.from_pandas(pd.DataFrame(eval_data))
            eval_dataset = eval_dataset.map(map_labels_to_ids, batched=True)
            eval_dataset_processed = eval_dataset.cast_column("label", Value("int64")).map(tokenize_function, batched=True)


        trainer = Trainer(
            model=self.hindi_model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=eval_dataset_processed,
            compute_metrics=compute_metrics if eval_data else None,
        )

        # Train
        trainer.train()

        # Save the fine-tuned model
        print("Saving fine-tuned Hindi model...")
        self.hindi_model.save_pretrained("./fine_tuned_hindi_sentiment")
        self.hindi_tokenizer.save_pretrained("./fine_tuned_hindi_sentiment")
        print("Hindi model training complete.")

    def preprocess_text(self, text):
        """Enhanced text cleaning with multilingual support"""
        if not text:
            return ""

        # Convert emojis to text
        text = emoji.demojize(text, delimiters=(" ", " "))

        # Remove URLs and mentions
        text = re.sub(r"http\S+|@\w+", "", text)

        # Expand common abbreviations (can be extended)
        abbrevs = {
            r"\bomg\b": "oh my god",
            r"\btbh\b": "to be honest",
            r"\bky\b": "kyun",  # Hindi 'why'
            r"\bkb\b": "kab",   # Hindi 'when'
            r"\bkya\b": "kya",  # Hindi 'what'
            r"\bkahan\b": "kahan", # Hindi 'where'
            r"\bkaisa\b": "kaisa" # Hindi 'how'
        }
        for pattern, replacement in abbrevs.items():
            text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)

        # Remove extra whitespace
        text = re.sub(r"\s+", " ", text).strip()

        return text

    def detect_language(self, text):
        """Improved language detection"""
        if re.search(r"[\u0900-\u097F]", text):  # Devanagari script (Hindi, Marathi etc.)
            return "hi"
        # Simple check for common Hindi/Hinglish words (can be expanded)
        hinglish_keywords = ["hai", "kyun", "nahi", "kya", "acha", "bas", "yaar", "main"]
        if any(re.search(rf"\b{kw}\b", text.lower()) for kw in hinglish_keywords):
            return "hi-latin"
        # Fallback to English if no strong Hindi/Hinglish indicators
        return "en"


    def analyze_content(self, text):
        """Main analysis function with improved confidence handling"""
        processed = self.preprocess_text(text)

        if not processed:
            return "neutral", 0.5, {"reason": "empty_text"}

        lang = self.detect_language(processed)

        # Check for neutral keywords first with higher confidence
        if any(re.search(rf"\b{re.escape(kw)}\b", processed.lower()) for kw in self.neutral_keywords):
            return "neutral", 0.9, {"reason": "neutral_keyword"}

        try:
            if lang in ("hi", "hi-latin"):
                # Use Hindi model for Hindi/Hinglish
                return self._analyze_hindi_content(processed)
            else:
                # Use ensemble for English
                return self._analyze_english_content(processed)
        except Exception as e:
            print(f"Analysis error for text '{processed[:50]}...': {e}")
            return "neutral", 0.5, {"error": str(e), "original_text": text[:50]}

    def _analyze_hindi_content(self, text):
        """Analyze Hindi content with fine-tuned model"""
        inputs = self.hindi_tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=CONFIG["max_length"]
        ).to(self.device)

        with torch.no_grad():
            outputs = self.hindi_model(**inputs)

        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        pred_idx = torch.argmax(probs).item()
        confidence = probs[0][pred_idx].item()

        label = self.hindi_model.config.id2label[pred_idx]
        return label, confidence, {"model": "fine-tuned-indic-bert", "lang": "hi"}

    def _analyze_english_content(self, text):
        """Analyze English content with ensemble approach"""
        # Emotion analysis
        emotion_inputs = self.emotion_tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=CONFIG["max_length"]
        ).to(self.device)

        with torch.no_grad():
            emotion_outputs = self.emotion_model(**emotion_inputs)

        emotion_probs = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
        emotion_pred = torch.argmax(emotion_probs).item()
        emotion_label = self.emotion_model.config.id2label[emotion_pred]
        emotion_score = emotion_probs[0][emotion_pred].item()

        # Sentiment analysis
        sentiment_inputs = self.sentiment_tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=CONFIG["max_length"]
        ).to(self.device)

        with torch.no_grad():
            sentiment_outputs = self.sentiment_model(**sentiment_inputs)

        sentiment_probs = torch.nn.functional.softmax(sentiment_outputs.logits, dim=-1)
        sentiment_pred = torch.argmax(sentiment_probs).item()
        # sentiment_label comes as 'LABEL_0', 'LABEL_1', 'LABEL_2'
        # Need to map these to 'negative', 'neutral', 'positive'
        # The roberta-base-sentiment-latest model has mapping: 0: Negative, 1: Neutral, 2: Positive
        sentiment_label_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}
        sentiment_label = sentiment_label_mapping.get(sentiment_pred, 'neutral') # Default to neutral if mapping fails
        sentiment_score = sentiment_probs[0][sentiment_pred].item()

        # Combine results
        mapped_emotion = self.emotion_map.get(emotion_label, "neutral")

        # Prioritize high-confidence sentiment
        if sentiment_score > CONFIG["confidence_threshold"]:
            final_label = sentiment_label
            final_confidence = sentiment_score
            reason = "high_sentiment_confidence"
        # Then prioritize high-confidence emotion if not neutral
        elif emotion_score > CONFIG["confidence_threshold"] and mapped_emotion != "neutral":
            final_label = mapped_emotion
            final_confidence = emotion_score
            reason = "high_emotion_confidence"
        else:
            # Fallback mechanism for lower confidence or conflicting results
            # A simple weighted sum or voting could be used,
            # but let's use a clearer logic:
            # If both are low confidence or neutral, and their results align, use that.
            # Otherwise, default to neutral or pick the one with slightly higher confidence
            # if it's not neutral.

            if sentiment_label == mapped_emotion and sentiment_label != "neutral":
                 final_label = sentiment_label
                 final_confidence = (sentiment_score + emotion_score) / 2
                 reason = "emotion_sentiment_agreement"
            elif sentiment_label != "neutral" and sentiment_score > emotion_score and sentiment_score > 0.4: # Use sentiment if somewhat confident
                 final_label = sentiment_label
                 final_confidence = sentiment_score * 0.9 # Slightly reduce confidence
                 reason = "sentiment_slightly_higher"
            elif mapped_emotion != "neutral" and emotion_score > sentiment_score and emotion_score > 0.4: # Use emotion if somewhat confident
                 final_label = mapped_emotion
                 final_confidence = emotion_score * 0.9 # Slightly reduce confidence
                 reason = "emotion_slightly_higher"
            else: # Default to neutral if no strong signal
                 final_label = "neutral"
                 final_confidence = 0.6 # Assign a baseline neutral confidence
                 reason = "fallback_to_neutral"


        return final_label, final_confidence, {
            "emotion_label": emotion_label,
            "emotion_score": emotion_score,
            "sentiment_label": sentiment_label,
            "sentiment_score": sentiment_score,
            "mapped_emotion": mapped_emotion,
            "model": "ensemble",
            "lang": "en",
            "reason": reason
        }

    def analyze_reels(self, reels, max_to_analyze=100):
        """Batch analysis with improved neutral handling"""
        print(f"\n--- Starting Sentiment Analysis ({max_to_analyze} reels) ---")
        results = Counter()
        detailed_results = []

        for i, reel in enumerate(reels[:max_to_analyze], 1):
            caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''
            print(f"Analyzing sentiment for reel {i}/{max_to_analyze} (ID: {reel.id})...")
            label, confidence, details = self.analyze_content(caption)
            results[label] += 1
            detailed_results.append({
                "reel_id": reel.id, # Add reel ID
                "text": caption,
                "label": label,
                "confidence": confidence,
                "details": details
            })

        print("\nInitial Sentiment Distribution:", dict(results))

        # Post-analysis neutral reduction if a significant portion is neutral
        total_analyzed = sum(results.values())
        if total_analyzed > 0 and results["neutral"] / total_analyzed > CONFIG["neutral_reanalysis_threshold"]:
            print(f"High neutral count ({results['neutral']}). Attempting to re-analyze...")
            self._reduce_neutrals(results, detailed_results)
            print("Sentiment distribution after re-analysis:", dict(results))

        print("Sentiment Analysis Complete.")
        return results, detailed_results

    def _reduce_neutrals(self, results, detailed_results):
        """Apply additional techniques to reduce neutral classifications"""
        neutrals_to_recheck = [item for item in detailed_results if item["label"] == "neutral" and item["confidence"] < 0.8]

        print(f"Re-checking {len(neutrals_to_recheck)} neutral reels...")

        for item in neutrals_to_recheck:
            original_text = item["text"]
            processed_text = self.preprocess_text(original_text)
            text_lower = processed_text.lower()

            # Try keyword analysis for strong positive/negative signals
            pos_keywords_strong = {"amazing", "love", "best", "fantastic", "awesome", "superb", "great",
                                   "अद्भुत", "शानदार", "बहुत अच्छा", "मज़ेदार"}
            neg_keywords_strong = {"hate", "worst", "bad", "terrible", "awful", "disappointed", "horrible", "cringe",
                                   "खराब", "बेकार", "बहुत बुरा", "घटिया"}

            is_strong_pos = any(re.search(rf"\b{re.escape(kw)}\b", text_lower) for kw in pos_keywords_strong)
            is_strong_neg = any(re.search(rf"\b{re.escape(kw)}\b", text_lower) for kw in neg_keywords_strong)

            if is_strong_pos and not is_strong_neg:
                # Reclassify as positive if strong positive keywords found and no strong negative ones
                results["neutral"] -= 1
                results["positive"] += 1
                item.update({
                    "label": "positive",
                    "confidence": min(0.95, item["confidence"] + 0.3), # Increase confidence
                    "reanalyzed": True,
                    "reanalysis_reason": "strong_pos_keywords"
                })
                # print(f"  Reclassified reel {item['reel_id']} to Positive (Keywords)")
            elif is_strong_neg and not is_strong_pos:
                # Reclassify as negative if strong negative keywords found and no strong positive ones
                results["neutral"] -= 1
                results["negative"] += 1
                item.update({
                    "label": "negative",
                    "confidence": min(0.95, item["confidence"] + 0.3), # Increase confidence
                    "reanalyzed": True,
                    "reanalysis_reason": "strong_neg_keywords"
                })
                # print(f"  Reclassified reel {item['reel_id']} to Negative (Keywords)")
            # Add other potential re-analysis rules here if needed
            # e.g., checking for question marks (might indicate neutral query),
            # or checking length (very short captions often neutral)
            # For now, we stick to keyword-based re-analysis for simplicity


def plot_sentiment_pie(results, title="Reels Sentiment Analysis"):
    """
    Creates a pie chart from sentiment analysis results and returns the matplotlib figure.

    Args:
        results: Counter object or dict with 'positive', 'neutral', 'negative' keys
        title: Chart title

    Returns:
        Matplotlib Figure object, or None if no data.
    """
    labels = ['Positive', 'Neutral', 'Negative']
    sizes = [results.get('positive', 0), results.get('neutral', 0), results.get('negative', 0)]

    if sum(sizes) == 0:
        return None

    colors = ['#4CAF50', '#FFC107', '#F44336']
    explode = (0.05, 0, 0.05)

    fig, ax = plt.subplots(figsize=(8, 6))

    filtered_labels = [label for i, label in enumerate(labels) if sizes[i] > 0]
    filtered_sizes = [size for size in sizes if size > 0]
    filtered_colors = [colors[i] for i, size in enumerate(sizes) if size > 0]
    explode_map = {'Positive': 0.05, 'Neutral': 0, 'Negative': 0.05}
    filtered_explode = [explode_map.get(label, 0) for label in filtered_labels]

    ax.pie(filtered_sizes, explode=filtered_explode, labels=filtered_labels, colors=filtered_colors,
           autopct='%1.1f%%', shadow=True, startangle=140,
           textprops={'fontsize': 12, 'color': 'black'})

    ax.axis('equal')
    plt.title(title, fontsize=16, pad=20)
    plt.tight_layout()

    # Return the figure object
    return fig

# --- Content Analysis Logic ---
# Content categories
content_categories = [
    "news", "meme", "sports", "science", "music", "movie",
    "gym", "comedy", "food", "technology", "travel", "fashion", "art", "business"
]

category_keywords = {
    "news": {"news", "update", "breaking", "reported", "headlines"},
    "meme": {"meme", "funny", "lol", "haha", "relatable"},
    "sports": {"sports", "cricket", "football", "match", "game", "team", "score"},
    "science": {"science", "research", "discovery", "experiment", "facts", "theory"},
    "music": {"music", "song", "album", "release", "artist", "beats"},
    "movie": {"movie", "film", "bollywood", "trailer", "series", "actor"},
    "gym": {"gym", "workout", "fitness", "exercise", "training", "bodybuilding"},
    "comedy": {"comedy", "joke", "humor", "standup", "skit", "laugh"},
    "food": {"food", "recipe", "cooking", "eat", "delicious", "restaurant", "kitchen"},
    "technology": {"tech", "phone", "computer", "ai", "gadget", "software", "innovation"},
    "travel": {"travel", "trip", "vacation", "explore", "destination", "adventure"},
    "fashion": {"fashion", "style", "ootd", "outfit", "trends", "clothing"},
    "art": {"art", "artist", "painting", "drawing", "creative", "design"},
    "business": {"business", "startup", "marketing", "money", "finance", "entrepreneur"}
}

def preprocess_text_cat(text):
    """Basic text cleaning for categorization"""
    if not text:
        return ""
    text = re.sub(r"http\S+|@\w+|#\w+", "", text).lower()
    text = re.sub(r"\s+", " ", text).strip()
    return text

def classify_reel_content(text):
    """Classify content using keywords and zero-shot model"""
    global content_classifier_pipeline # Use the global pipeline

    processed = preprocess_text_cat(text)

    if not processed or len(processed.split()) < 2:
        return "other", {"reason": "short_text"}

    for category, keywords in category_keywords.items():
        if any(re.search(rf"\b{re.escape(keyword)}\b", processed) for keyword in keywords):
            return category, {"reason": "keyword_match"}

    model_text = processed[:256]

    if content_classifier_pipeline is None:
        # Should not happen if initialized in analyze_reels_gradio or globally
        print("Content classifier pipeline not initialized in classify_reel_content.")
        return "other", {"reason": "classifier_not_initialized"}

    try:
        result = content_classifier_pipeline(model_text, content_categories, multi_label=False)
        top_label = result['labels'][0]
        top_score = result['scores'][0]

        if top_score > 0.5:
             return top_label, {"reason": "model_prediction", "score": top_score}
        else:
             return "other", {"reason": "low_model_confidence", "score": top_score}

    except Exception as e:
        print(f"Error during zero-shot classification for text '{model_text}...': {e}")
        return "other", {"reason": "classification_error"}


def plot_category_distribution(counter, title="Reels Content Distribution"):
    """
    Generate pie chart from category counts and returns the matplotlib figure.

    Args:
        counter: Counter object with category counts.
        title: Chart title.

    Returns:
        Matplotlib Figure object, or None if no data.
    """
    labels = []
    sizes = []

    total = sum(counter.values())
    if total == 0:
        return None

    threshold = total * 0.02
    other_count = 0

    sorted_categories = counter.most_common()

    for category, count in sorted_categories:
        if count >= threshold and category != "other":
            labels.append(category.replace('_', ' ').title())
            sizes.append(count)
        elif category == "other":
             other_count += count
        else:
            other_count += count

    if other_count > 0:
        labels.append("Other")
        sizes.append(other_count)

    if not sizes:
         return None

    fig, ax = plt.subplots(figsize=(10, 8))
    colors = plt.cm.viridis(np.linspace(0, 1, len(sizes)))

    ax.pie(
        sizes,
        labels=labels,
        autopct='%1.1f%%',
        startangle=140,
        colors=colors,
        wedgeprops={'edgecolor': 'white', 'linewidth': 1},
        textprops={'fontsize': 11, 'color': 'black'}
    )

    plt.title(title, pad=20, fontsize=15)
    plt.axis('equal')
    plt.tight_layout()

    # Return the figure object
    return fig


# --- Gradio-Compatible Functions ---
# Preset username from Colab secrets
# Ensure USERNAME is set in your Colab secrets
USERNAME = "jattman1993" # Replace with your preset username or fetch from secrets if needed

def login_gradio_auto():
    """Gradio-compatible function for automatic login."""
    global cl
    try:
        # Fetch password securely from Colab secrets
        PASSWORD = userdata.get('password')
    except Exception as e:
        return f"Error accessing password secret: {e}", gr.update(visible=False) # Hide OTP input on error

    if not PASSWORD:
        return "Error: Instagram password not found in Colab secrets. Please add it to Colab secrets with the key 'password'.", gr.update(visible=False) # Hide OTP input

    cl = Client()

    try:
        cl.login(USERNAME, PASSWORD)
        # If login is successful, return success message and hide OTP input
        return f"Successfully logged in as {USERNAME}", gr.update(visible=False)
    except Exception as e:
        cl = None # Ensure cl is None on failure
        error_message = str(e)
        if "Two factor challenged" in error_message or "challenge_required" in error_message:
            # If 2FA is required, show the OTP input field
            return f"Login failed: Two-factor authentication required. Please enter the code below.", gr.update(visible=True)
        else:
            # For other errors, hide OTP input and show error message
            return f"Error during login: {error_message}", gr.update(visible=False)

# Function to handle OTP submission (if 2FA was required)
def submit_otp_gradio(otp_code):
    """Gradio-compatible function to submit OTP."""
    global cl
    if cl is None:
        return "Error: Not logged in or client not initialized.", "", gr.update(visible=False) # Hide OTP input

    try:
        # Assuming the challenge was set up correctly in the login attempt
        # and the cl object has the challenge_data
        cl.two_factor_login(otp_code)
        # If OTP is successful
        return f"OTP successful. Successfully logged in as {USERNAME}.", "", gr.update(visible=False) # Clear OTP input and hide field
    except Exception as e:
        # If OTP fails
        return f"OTP submission failed: {e}. Please try again.", "", gr.update(visible=True) # Keep OTP input visible


def fetch_reels_gradio():
    """Gradio-compatible function to fetch explore reels."""
    global cl
    global explore_reels_list

    if cl is None:
        explore_reels_list = [] # Ensure list is empty on failure
        return "Error: Not logged in. Please log in first."

    try:
        # Fetch a limited number of reels for demonstration purposes
        # You might want to make this number configurable later
        fetched_reels = cl.explore_reels()[:100] # Fetch up to 100 for analysis
        explore_reels_list = fetched_reels
        if explore_reels_list:
            return f"Successfully fetched {len(explore_reels_list)} explore reels."
        else:
            explore_reels_list = [] # Ensure it's an empty list
            return "Fetched 0 explore reels."
    except Exception as e:
        explore_reels_list = [] # Ensure it's an empty list on error
        return f"Error fetching explore reels: {e}"


def analyze_reels_gradio(max_to_analyze):
    """Gradio-compatible function to analyze fetched reels and generate plots."""
    global explore_reels_list
    global sentiment_analyzer_instance
    global content_classifier_pipeline

    if not explore_reels_list:
        # Return None for plots if no reels
        return "Error: No reels fetched yet. Please fetch reels first.", None, None

    # Ensure max_to_analyze does not exceed the number of fetched reels
    num_reels_to_process = min(max_to_analyze, len(explore_reels_list))
    reels_to_analyze = explore_reels_list[:num_reels_to_process]

    if not reels_to_analyze:
         return "Error: No reels available to analyze.", None, None


    # Initialize sentiment analyzer if not already done
    if sentiment_analyzer_instance is None:
        try:
            sentiment_analyzer_instance = ReelSentimentAnalyzer()
             # Optional: Train Hindi model if needed and data is available
            # sample_train_data = [...] # Define your training data
            # sentiment_analyzer_instance.train_hindi_model(sample_train_data)
        except Exception as e:
            return f"Error initializing Sentiment Analyzer: {e}", None, None

    # Initialize content classifier pipeline if not already done
    if content_classifier_pipeline is None:
         try:
             print("Initializing Content Classifier Pipeline...")
             content_classifier_pipeline = pipeline(
                 "zero-shot-classification",
                 model="facebook/bart-large-mnli",
                 device=0 if torch.cuda.is_available() else -1 # Use GPU if available
             )
             print("Content Classifier Pipeline Initialized.")
         except Exception as e:
             return f"Error initializing Content Classifier: {e}", None, None


    analysis_status_messages = []
    sentiment_plot_figure = None # Changed to figure
    content_plot_figure = None   # Changed to figure

    # Perform Sentiment Analysis
    try:
        analysis_status_messages.append(f"Starting Sentiment Analysis for {len(reels_to_analyze)} reels...")
        sentiment_results, detailed_sentiment_results = sentiment_analyzer_instance.analyze_reels(
            reels_to_analyze,
            max_to_analyze=len(reels_to_analyze) # Pass the actual number being processed
        )
        # Call the updated plotting function that returns a figure
        sentiment_plot_figure = plot_sentiment_pie(sentiment_results, title=f"Sentiment of {len(reels_to_analyze)} Instagram Reels")
        analysis_status_messages.append("Sentiment Analysis Complete.")
    except Exception as e:
        analysis_status_messages.append(f"Error during Sentiment Analysis: {e}")
        sentiment_plot_figure = None # Ensure plot is None on error


    # Perform Content Categorization
    try:
        analysis_status_messages.append(f"Starting Content Categorization for {len(reels_to_analyze)} reels...")
        category_counts = Counter()
        # Re-implement content analysis slightly to fit this flow using the global pipeline
        print(f"\n⏳ Analyzing content for {len(reels_to_analyze)} reels...")
        for i, reel in enumerate(reels_to_analyze, 1):
            caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''
            # Use the global classifier pipeline
            category, details = classify_reel_content(caption)
            category_counts[category] += 1

        print("\n✅ Content Analysis complete!")
        print("\n📊 Category Counts:")
        for category, count in category_counts.most_common():
            print(f"- {category.replace('_', ' ').title()}: {count}")

        # Call the updated plotting function that returns a figure
        content_plot_figure = plot_category_distribution(category_counts)
        analysis_status_messages.append("Content Categorization Complete.")

    except Exception as e:
        analysis_status_messages.append(f"Error during Content Analysis: {e}")
        content_plot_figure = None # Ensure plot is None on error


    final_status_message = "\n".join(analysis_status_messages)
    # Return the figure objects
    return final_status_message, sentiment_plot_figure, content_plot_figure


# --- Gradio Blocks Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Instagram Reels Analysis")

    # Login Section
    with gr.Row():
        connect_button = gr.Button("Connect Instagram")
    login_status_output = gr.Label(label="Login Status")

    # OTP Input (initially hidden)
    with gr.Row(visible=False) as otp_row:
        otp_input = gr.Textbox(label="Enter OTP Code")
        otp_submit_button = gr.Button("Submit OTP")


    # Fetch Reels Section
    with gr.Row():
        fetch_button = gr.Button("Fetch Reels")
    fetch_status_output = gr.Label(label="Fetch Status")

    # Analysis Section
    with gr.Row():
        max_reels_input = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Reels to Analyze")
        analyze_button = gr.Button("Analyze Reels")

    analyze_status_output = gr.Label(label="Analysis Status")

    # Results Section
    with gr.Row():
        # Sentiment Analysis Outputs
        with gr.Column():
            gr.Markdown("## Sentiment Analysis")
            sentiment_plot_output = gr.Plot(label="Sentiment Distribution")

        # Content Analysis Outputs
        with gr.Column():
            gr.Markdown("## Content Analysis")
            content_plot_output = gr.Plot(label="Content Distribution")


    # Link buttons to functions
    connect_button.click(
        fn=login_gradio_auto,
        inputs=None, # No direct inputs, username is preset
        outputs=[login_status_output, otp_row]
    )

    otp_submit_button.click(
        fn=submit_otp_gradio,
        inputs=otp_input,
        outputs=[login_status_output, otp_input, otp_row]
    )

    fetch_button.click(
        fn=fetch_reels_gradio,
        inputs=None, # No direct inputs needed for fetching
        outputs=fetch_status_output
    )

    analyze_button.click(
        fn=analyze_reels_gradio,
        inputs=max_reels_input, # Input is the slider value
        outputs=[analyze_status_output, sentiment_plot_output, content_plot_output] # Outputs are status and the two plots
    )

# --- Launch the Gradio app ---
if __name__ == "__main__":
    # This block ensures the app only launches when the script is executed directly
    # (e.g., when running `python deploy.py` or `gradio deploy.py`)
    # It prevents the app from launching automatically when the file is written in Colab.
    # When deploying to Hugging Face Spaces via `gradio deploy`, it will find and run this.
    # For Colab sharing, you can use `demo.launch(share=True)` outside this if block.

    # For standalone deploy.py, you might want to uncomment this:
    # demo.launch()

    # For Colab and `gradio deploy` compatibility, the `gradio deploy` command handles launching.
    # The `demo.launch()` line is removed here from the main script block.
    pass # Keep the __main__ block if needed for local testing setup


# Note: When using `gradio deploy` on Hugging Face Spaces, the `demo` object is
# automatically discovered and launched. You don't need `demo.launch()` here
# for that specific deployment method.

# For running directly in Colab to test before deploying:
# demo.launch(share=True)