Spaces:

Agrannya
/

Insagram_reel_Analyzer

Runtime error

App Files Files Community

Insagram_reel_Analyzer / deploy.py

Agrannya

Upload folder using huggingface_hub

aefa421 verified 3 months ago

raw

history blame contribute delete

35.6 kB

	##FINAL FILE

	# This deploy.py file contains the complete code for the Instagram Reels Analysis Gradio App.

	# --- Imports ---
	import gradio as gr
	import time
	import random
	import matplotlib.pyplot as plt
	import pandas as pd
	import torch
	import emoji
	import re
	import numpy as np
	import io # Import io for handling image bytes


	from instagrapi import Client
	from transformers import (
	pipeline,
	AutoTokenizer,
	AutoModelForSequenceClassification,
	Trainer,
	TrainingArguments,
	RobertaForSequenceClassification,
	AlbertForSequenceClassification
	)
	from datasets import Dataset, Features, Value
	from collections import Counter
	from sklearn.metrics import accuracy_score, f1_score

	# --- Configuration ---
	CONFIG = {
	"max_length": 128,
	"batch_size": 16,
	"learning_rate": 2e-5,
	"num_train_epochs": 3,
	"few_shot_examples": 5, # per class
	"confidence_threshold": 0.7,
	"neutral_reanalysis_threshold": 0.33
	}

	# --- Global Variables for State Management ---
	global cl
	global explore_reels_list
	global sentiment_analyzer_instance
	global content_classifier_pipeline

	cl = None
	explore_reels_list = []
	sentiment_analyzer_instance = None
	content_classifier_pipeline = None


	# --- Sentiment Analysis Class ---
	class ReelSentimentAnalyzer:
	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self._initialize_models()

	def _initialize_models(self):
	"""Initialize and configure all models"""
	print("\nInitializing Sentiment Analysis Models...")
	# English models
	print("Loading English Emotion Model...")
	self.emotion_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-emotion-analysis")
	self.emotion_model = AutoModelForSequenceClassification.from_pretrained(
	"finiteautomata/bertweet-base-emotion-analysis"
	).to(self.device)
	print("Loading English Sentiment Model...")
	self.sentiment_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
	self.sentiment_model = RobertaForSequenceClassification.from_pretrained(
	"cardiffnlp/twitter-roberta-base-sentiment-latest",
	ignore_mismatched_sizes=True
	).to(self.device)

	# Hindi/English model (we'll fine-tune this)
	print("Loading Indic-BERT Model for Hindi/Hinglish...")
	self.hindi_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
	self.hindi_model = AlbertForSequenceClassification.from_pretrained(
	"ai4bharat/indic-bert",
	num_labels=3,
	id2label={0: "negative", 1: "neutral", 2: "positive"},
	label2id={"negative": 0, "neutral": 1, "positive": 2}
	).to(self.device)
	# Store label2id mapping for easy access
	self.hindi_label2id = self.hindi_model.config.label2id
	print("Models Initialized.")

	# Emotion to sentiment mapping
	self.emotion_map = {
	"joy": "positive", "love": "positive", "happy": "positive",
	"anger": "negative", "sadness": "negative", "fear": "negative",
	"surprise": "neutral", "neutral": "neutral", "disgust": "negative", "shame": "negative"
	}

	# Neutral keywords
	self.neutral_keywords = {
	"ad", "sponsored", "promo", "sale", "discount", "offer", "giveaway",
	"buy", "shop", "link in bio",
	"विज्ञापन", "प्रचार", "ऑफर", "डिस्काउंट", "बिक्री", "लिंक बायो में"
	}

	def train_hindi_model(self, train_data, eval_data=None):
	"""
	Fine-tune the Hindi/English model on labeled data
	Args:
	train_data: List of dicts [{"text": "...", "label": "positive/negative/neutral"}]
	eval_data: Optional evaluation data
	"""
	print("\nStarting Hindi model training...")
	# Convert to dataset
	train_dataset = Dataset.from_pandas(pd.DataFrame(train_data))

	# Map string labels to integer IDs
	def map_labels_to_ids(examples):
	# Ensure label exists and is in expected range
	labels = []
	for label_str in examples["label"]:
	if label_str in self.hindi_label2id:
	labels.append(self.hindi_label2id[label_str])
	else:
	# Handle unexpected labels, maybe map to neutral or skip
	print(f"Warning: Unexpected label '{label_str}'. Mapping to neutral.")
	labels.append(self.hindi_label2id["neutral"]) # Map unknown to neutral
	examples["label"] = labels
	return examples


	train_dataset = train_dataset.map(map_labels_to_ids, batched=True)

	# Explicitly set the label column to integer type
	train_dataset = train_dataset.cast_column("label", Value("int64"))


	def tokenize_function(examples):
	return self.hindi_tokenizer(
	examples["text"],
	padding="max_length",
	truncation=True,
	max_length=CONFIG["max_length"]
	)

	tokenized_train = train_dataset.map(tokenize_function, batched=True)

	# Training arguments - using eval_strategy instead of evaluation_strategy
	training_args = TrainingArguments(
	output_dir="./results",
	eval_strategy="epoch" if eval_data else "no",
	per_device_train_batch_size=CONFIG["batch_size"],
	per_device_eval_batch_size=CONFIG["batch_size"],
	learning_rate=CONFIG["learning_rate"],
	num_train_epochs=CONFIG["num_train_epochs"],
	weight_decay=0.01,
	save_strategy="no", # Don't save checkpoints during training
	logging_dir='./logs',
	logging_steps=10,
	report_to="none" # Don't report to external services
	)

	# Compute metrics function
	def compute_metrics(p):
	predictions, labels = p
	predictions = np.argmax(predictions, axis=1)
	return {
	"accuracy": accuracy_score(labels, predictions),
	"f1": f1_score(labels, predictions, average="weighted")
	}

	# Trainer
	eval_dataset_processed = None
	if eval_data:
	eval_dataset = Dataset.from_pandas(pd.DataFrame(eval_data))
	eval_dataset = eval_dataset.map(map_labels_to_ids, batched=True)
	eval_dataset_processed = eval_dataset.cast_column("label", Value("int64")).map(tokenize_function, batched=True)


	trainer = Trainer(
	model=self.hindi_model,
	args=training_args,
	train_dataset=tokenized_train,
	eval_dataset=eval_dataset_processed,
	compute_metrics=compute_metrics if eval_data else None,
	)

	# Train
	trainer.train()

	# Save the fine-tuned model
	print("Saving fine-tuned Hindi model...")
	self.hindi_model.save_pretrained("./fine_tuned_hindi_sentiment")
	self.hindi_tokenizer.save_pretrained("./fine_tuned_hindi_sentiment")
	print("Hindi model training complete.")

	def preprocess_text(self, text):
	"""Enhanced text cleaning with multilingual support"""
	if not text:
	return ""

	# Convert emojis to text
	text = emoji.demojize(text, delimiters=(" ", " "))

	# Remove URLs and mentions
	text = re.sub(r"http\S+\|@\w+", "", text)

	# Expand common abbreviations (can be extended)
	abbrevs = {
	r"\bomg\b": "oh my god",
	r"\btbh\b": "to be honest",
	r"\bky\b": "kyun", # Hindi 'why'
	r"\bkb\b": "kab", # Hindi 'when'
	r"\bkya\b": "kya", # Hindi 'what'
	r"\bkahan\b": "kahan", # Hindi 'where'
	r"\bkaisa\b": "kaisa" # Hindi 'how'
	}
	for pattern, replacement in abbrevs.items():
	text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)

	# Remove extra whitespace
	text = re.sub(r"\s+", " ", text).strip()

	return text

	def detect_language(self, text):
	"""Improved language detection"""
	if re.search(r"[\u0900-\u097F]", text): # Devanagari script (Hindi, Marathi etc.)
	return "hi"
	# Simple check for common Hindi/Hinglish words (can be expanded)
	hinglish_keywords = ["hai", "kyun", "nahi", "kya", "acha", "bas", "yaar", "main"]
	if any(re.search(rf"\b{kw}\b", text.lower()) for kw in hinglish_keywords):
	return "hi-latin"
	# Fallback to English if no strong Hindi/Hinglish indicators
	return "en"


	def analyze_content(self, text):
	"""Main analysis function with improved confidence handling"""
	processed = self.preprocess_text(text)

	if not processed:
	return "neutral", 0.5, {"reason": "empty_text"}

	lang = self.detect_language(processed)

	# Check for neutral keywords first with higher confidence
	if any(re.search(rf"\b{re.escape(kw)}\b", processed.lower()) for kw in self.neutral_keywords):
	return "neutral", 0.9, {"reason": "neutral_keyword"}

	try:
	if lang in ("hi", "hi-latin"):
	# Use Hindi model for Hindi/Hinglish
	return self._analyze_hindi_content(processed)
	else:
	# Use ensemble for English
	return self._analyze_english_content(processed)
	except Exception as e:
	print(f"Analysis error for text '{processed[:50]}...': {e}")
	return "neutral", 0.5, {"error": str(e), "original_text": text[:50]}

	def _analyze_hindi_content(self, text):
	"""Analyze Hindi content with fine-tuned model"""
	inputs = self.hindi_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=CONFIG["max_length"]
	).to(self.device)

	with torch.no_grad():
	outputs = self.hindi_model(**inputs)

	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
	pred_idx = torch.argmax(probs).item()
	confidence = probs[0][pred_idx].item()

	label = self.hindi_model.config.id2label[pred_idx]
	return label, confidence, {"model": "fine-tuned-indic-bert", "lang": "hi"}

	def _analyze_english_content(self, text):
	"""Analyze English content with ensemble approach"""
	# Emotion analysis
	emotion_inputs = self.emotion_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=CONFIG["max_length"]
	).to(self.device)

	with torch.no_grad():
	emotion_outputs = self.emotion_model(**emotion_inputs)

	emotion_probs = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
	emotion_pred = torch.argmax(emotion_probs).item()
	emotion_label = self.emotion_model.config.id2label[emotion_pred]
	emotion_score = emotion_probs[0][emotion_pred].item()

	# Sentiment analysis
	sentiment_inputs = self.sentiment_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=CONFIG["max_length"]
	).to(self.device)

	with torch.no_grad():
	sentiment_outputs = self.sentiment_model(**sentiment_inputs)

	sentiment_probs = torch.nn.functional.softmax(sentiment_outputs.logits, dim=-1)
	sentiment_pred = torch.argmax(sentiment_probs).item()
	# sentiment_label comes as 'LABEL_0', 'LABEL_1', 'LABEL_2'
	# Need to map these to 'negative', 'neutral', 'positive'
	# The roberta-base-sentiment-latest model has mapping: 0: Negative, 1: Neutral, 2: Positive
	sentiment_label_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}
	sentiment_label = sentiment_label_mapping.get(sentiment_pred, 'neutral') # Default to neutral if mapping fails
	sentiment_score = sentiment_probs[0][sentiment_pred].item()

	# Combine results
	mapped_emotion = self.emotion_map.get(emotion_label, "neutral")

	# Prioritize high-confidence sentiment
	if sentiment_score > CONFIG["confidence_threshold"]:
	final_label = sentiment_label
	final_confidence = sentiment_score
	reason = "high_sentiment_confidence"
	# Then prioritize high-confidence emotion if not neutral
	elif emotion_score > CONFIG["confidence_threshold"] and mapped_emotion != "neutral":
	final_label = mapped_emotion
	final_confidence = emotion_score
	reason = "high_emotion_confidence"
	else:
	# Fallback mechanism for lower confidence or conflicting results
	# A simple weighted sum or voting could be used,
	# but let's use a clearer logic:
	# If both are low confidence or neutral, and their results align, use that.
	# Otherwise, default to neutral or pick the one with slightly higher confidence
	# if it's not neutral.

	if sentiment_label == mapped_emotion and sentiment_label != "neutral":
	final_label = sentiment_label
	final_confidence = (sentiment_score + emotion_score) / 2
	reason = "emotion_sentiment_agreement"
	elif sentiment_label != "neutral" and sentiment_score > emotion_score and sentiment_score > 0.4: # Use sentiment if somewhat confident
	final_label = sentiment_label
	final_confidence = sentiment_score * 0.9 # Slightly reduce confidence
	reason = "sentiment_slightly_higher"
	elif mapped_emotion != "neutral" and emotion_score > sentiment_score and emotion_score > 0.4: # Use emotion if somewhat confident
	final_label = mapped_emotion
	final_confidence = emotion_score * 0.9 # Slightly reduce confidence
	reason = "emotion_slightly_higher"
	else: # Default to neutral if no strong signal
	final_label = "neutral"
	final_confidence = 0.6 # Assign a baseline neutral confidence
	reason = "fallback_to_neutral"


	return final_label, final_confidence, {
	"emotion_label": emotion_label,
	"emotion_score": emotion_score,
	"sentiment_label": sentiment_label,
	"sentiment_score": sentiment_score,
	"mapped_emotion": mapped_emotion,
	"model": "ensemble",
	"lang": "en",
	"reason": reason
	}

	def analyze_reels(self, reels, max_to_analyze=100):
	"""Batch analysis with improved neutral handling"""
	print(f"\n--- Starting Sentiment Analysis ({max_to_analyze} reels) ---")
	results = Counter()
	detailed_results = []

	for i, reel in enumerate(reels[:max_to_analyze], 1):
	caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''
	print(f"Analyzing sentiment for reel {i}/{max_to_analyze} (ID: {reel.id})...")
	label, confidence, details = self.analyze_content(caption)
	results[label] += 1
	detailed_results.append({
	"reel_id": reel.id, # Add reel ID
	"text": caption,
	"label": label,
	"confidence": confidence,
	"details": details
	})

	print("\nInitial Sentiment Distribution:", dict(results))

	# Post-analysis neutral reduction if a significant portion is neutral
	total_analyzed = sum(results.values())
	if total_analyzed > 0 and results["neutral"] / total_analyzed > CONFIG["neutral_reanalysis_threshold"]:
	print(f"High neutral count ({results['neutral']}). Attempting to re-analyze...")
	self._reduce_neutrals(results, detailed_results)
	print("Sentiment distribution after re-analysis:", dict(results))

	print("Sentiment Analysis Complete.")
	return results, detailed_results

	def _reduce_neutrals(self, results, detailed_results):
	"""Apply additional techniques to reduce neutral classifications"""
	neutrals_to_recheck = [item for item in detailed_results if item["label"] == "neutral" and item["confidence"] < 0.8]

	print(f"Re-checking {len(neutrals_to_recheck)} neutral reels...")

	for item in neutrals_to_recheck:
	original_text = item["text"]
	processed_text = self.preprocess_text(original_text)
	text_lower = processed_text.lower()

	# Try keyword analysis for strong positive/negative signals
	pos_keywords_strong = {"amazing", "love", "best", "fantastic", "awesome", "superb", "great",
	"अद्भुत", "शानदार", "बहुत अच्छा", "मज़ेदार"}
	neg_keywords_strong = {"hate", "worst", "bad", "terrible", "awful", "disappointed", "horrible", "cringe",
	"खराब", "बेकार", "बहुत बुरा", "घटिया"}

	is_strong_pos = any(re.search(rf"\b{re.escape(kw)}\b", text_lower) for kw in pos_keywords_strong)
	is_strong_neg = any(re.search(rf"\b{re.escape(kw)}\b", text_lower) for kw in neg_keywords_strong)

	if is_strong_pos and not is_strong_neg:
	# Reclassify as positive if strong positive keywords found and no strong negative ones
	results["neutral"] -= 1
	results["positive"] += 1
	item.update({
	"label": "positive",
	"confidence": min(0.95, item["confidence"] + 0.3), # Increase confidence
	"reanalyzed": True,
	"reanalysis_reason": "strong_pos_keywords"
	})
	# print(f" Reclassified reel {item['reel_id']} to Positive (Keywords)")
	elif is_strong_neg and not is_strong_pos:
	# Reclassify as negative if strong negative keywords found and no strong positive ones
	results["neutral"] -= 1
	results["negative"] += 1
	item.update({
	"label": "negative",
	"confidence": min(0.95, item["confidence"] + 0.3), # Increase confidence
	"reanalyzed": True,
	"reanalysis_reason": "strong_neg_keywords"
	})
	# print(f" Reclassified reel {item['reel_id']} to Negative (Keywords)")
	# Add other potential re-analysis rules here if needed
	# e.g., checking for question marks (might indicate neutral query),
	# or checking length (very short captions often neutral)
	# For now, we stick to keyword-based re-analysis for simplicity


	def plot_sentiment_pie(results, title="Reels Sentiment Analysis"):
	"""
	Creates a pie chart from sentiment analysis results and returns the matplotlib figure.

	Args:
	results: Counter object or dict with 'positive', 'neutral', 'negative' keys
	title: Chart title

	Returns:
	Matplotlib Figure object, or None if no data.
	"""
	labels = ['Positive', 'Neutral', 'Negative']
	sizes = [results.get('positive', 0), results.get('neutral', 0), results.get('negative', 0)]

	if sum(sizes) == 0:
	return None

	colors = ['#4CAF50', '#FFC107', '#F44336']
	explode = (0.05, 0, 0.05)

	fig, ax = plt.subplots(figsize=(8, 6))

	filtered_labels = [label for i, label in enumerate(labels) if sizes[i] > 0]
	filtered_sizes = [size for size in sizes if size > 0]
	filtered_colors = [colors[i] for i, size in enumerate(sizes) if size > 0]
	explode_map = {'Positive': 0.05, 'Neutral': 0, 'Negative': 0.05}
	filtered_explode = [explode_map.get(label, 0) for label in filtered_labels]

	ax.pie(filtered_sizes, explode=filtered_explode, labels=filtered_labels, colors=filtered_colors,
	autopct='%1.1f%%', shadow=True, startangle=140,
	textprops={'fontsize': 12, 'color': 'black'})

	ax.axis('equal')
	plt.title(title, fontsize=16, pad=20)
	plt.tight_layout()

	# Return the figure object
	return fig

	# --- Content Analysis Logic ---
	# Content categories
	content_categories = [
	"news", "meme", "sports", "science", "music", "movie",
	"gym", "comedy", "food", "technology", "travel", "fashion", "art", "business"
	]

	category_keywords = {
	"news": {"news", "update", "breaking", "reported", "headlines"},
	"meme": {"meme", "funny", "lol", "haha", "relatable"},
	"sports": {"sports", "cricket", "football", "match", "game", "team", "score"},
	"science": {"science", "research", "discovery", "experiment", "facts", "theory"},
	"music": {"music", "song", "album", "release", "artist", "beats"},
	"movie": {"movie", "film", "bollywood", "trailer", "series", "actor"},
	"gym": {"gym", "workout", "fitness", "exercise", "training", "bodybuilding"},
	"comedy": {"comedy", "joke", "humor", "standup", "skit", "laugh"},
	"food": {"food", "recipe", "cooking", "eat", "delicious", "restaurant", "kitchen"},
	"technology": {"tech", "phone", "computer", "ai", "gadget", "software", "innovation"},
	"travel": {"travel", "trip", "vacation", "explore", "destination", "adventure"},
	"fashion": {"fashion", "style", "ootd", "outfit", "trends", "clothing"},
	"art": {"art", "artist", "painting", "drawing", "creative", "design"},
	"business": {"business", "startup", "marketing", "money", "finance", "entrepreneur"}
	}

	def preprocess_text_cat(text):
	"""Basic text cleaning for categorization"""
	if not text:
	return ""
	text = re.sub(r"http\S+\|@\w+\|#\w+", "", text).lower()
	text = re.sub(r"\s+", " ", text).strip()
	return text

	def classify_reel_content(text):
	"""Classify content using keywords and zero-shot model"""
	global content_classifier_pipeline # Use the global pipeline

	processed = preprocess_text_cat(text)

	if not processed or len(processed.split()) < 2:
	return "other", {"reason": "short_text"}

	for category, keywords in category_keywords.items():
	if any(re.search(rf"\b{re.escape(keyword)}\b", processed) for keyword in keywords):
	return category, {"reason": "keyword_match"}

	model_text = processed[:256]

	if content_classifier_pipeline is None:
	# Should not happen if initialized in analyze_reels_gradio or globally
	print("Content classifier pipeline not initialized in classify_reel_content.")
	return "other", {"reason": "classifier_not_initialized"}

	try:
	result = content_classifier_pipeline(model_text, content_categories, multi_label=False)
	top_label = result['labels'][0]
	top_score = result['scores'][0]

	if top_score > 0.5:
	return top_label, {"reason": "model_prediction", "score": top_score}
	else:
	return "other", {"reason": "low_model_confidence", "score": top_score}

	except Exception as e:
	print(f"Error during zero-shot classification for text '{model_text}...': {e}")
	return "other", {"reason": "classification_error"}


	def plot_category_distribution(counter, title="Reels Content Distribution"):
	"""
	Generate pie chart from category counts and returns the matplotlib figure.

	Args:
	counter: Counter object with category counts.
	title: Chart title.

	Returns:
	Matplotlib Figure object, or None if no data.
	"""
	labels = []
	sizes = []

	total = sum(counter.values())
	if total == 0:
	return None

	threshold = total * 0.02
	other_count = 0

	sorted_categories = counter.most_common()

	for category, count in sorted_categories:
	if count >= threshold and category != "other":
	labels.append(category.replace('_', ' ').title())
	sizes.append(count)
	elif category == "other":
	other_count += count
	else:
	other_count += count

	if other_count > 0:
	labels.append("Other")
	sizes.append(other_count)

	if not sizes:
	return None

	fig, ax = plt.subplots(figsize=(10, 8))
	colors = plt.cm.viridis(np.linspace(0, 1, len(sizes)))

	ax.pie(
	sizes,
	labels=labels,
	autopct='%1.1f%%',
	startangle=140,
	colors=colors,
	wedgeprops={'edgecolor': 'white', 'linewidth': 1},
	textprops={'fontsize': 11, 'color': 'black'}
	)

	plt.title(title, pad=20, fontsize=15)
	plt.axis('equal')
	plt.tight_layout()

	# Return the figure object
	return fig


	# --- Gradio-Compatible Functions ---
	# Preset username from Colab secrets
	# Ensure USERNAME is set in your Colab secrets
	USERNAME = "jattman1993" # Replace with your preset username or fetch from secrets if needed

	def login_gradio_auto():
	"""Gradio-compatible function for automatic login."""
	global cl
	try:
	# Fetch password securely from Colab secrets
	PASSWORD = userdata.get('password')
	except Exception as e:
	return f"Error accessing password secret: {e}", gr.update(visible=False) # Hide OTP input on error

	if not PASSWORD:
	return "Error: Instagram password not found in Colab secrets. Please add it to Colab secrets with the key 'password'.", gr.update(visible=False) # Hide OTP input

	cl = Client()

	try:
	cl.login(USERNAME, PASSWORD)
	# If login is successful, return success message and hide OTP input
	return f"Successfully logged in as {USERNAME}", gr.update(visible=False)
	except Exception as e:
	cl = None # Ensure cl is None on failure
	error_message = str(e)
	if "Two factor challenged" in error_message or "challenge_required" in error_message:
	# If 2FA is required, show the OTP input field
	return f"Login failed: Two-factor authentication required. Please enter the code below.", gr.update(visible=True)
	else:
	# For other errors, hide OTP input and show error message
	return f"Error during login: {error_message}", gr.update(visible=False)

	# Function to handle OTP submission (if 2FA was required)
	def submit_otp_gradio(otp_code):
	"""Gradio-compatible function to submit OTP."""
	global cl
	if cl is None:
	return "Error: Not logged in or client not initialized.", "", gr.update(visible=False) # Hide OTP input

	try:
	# Assuming the challenge was set up correctly in the login attempt
	# and the cl object has the challenge_data
	cl.two_factor_login(otp_code)
	# If OTP is successful
	return f"OTP successful. Successfully logged in as {USERNAME}.", "", gr.update(visible=False) # Clear OTP input and hide field
	except Exception as e:
	# If OTP fails
	return f"OTP submission failed: {e}. Please try again.", "", gr.update(visible=True) # Keep OTP input visible


	def fetch_reels_gradio():
	"""Gradio-compatible function to fetch explore reels."""
	global cl
	global explore_reels_list

	if cl is None:
	explore_reels_list = [] # Ensure list is empty on failure
	return "Error: Not logged in. Please log in first."

	try:
	# Fetch a limited number of reels for demonstration purposes
	# You might want to make this number configurable later
	fetched_reels = cl.explore_reels()[:100] # Fetch up to 100 for analysis
	explore_reels_list = fetched_reels
	if explore_reels_list:
	return f"Successfully fetched {len(explore_reels_list)} explore reels."
	else:
	explore_reels_list = [] # Ensure it's an empty list
	return "Fetched 0 explore reels."
	except Exception as e:
	explore_reels_list = [] # Ensure it's an empty list on error
	return f"Error fetching explore reels: {e}"


	def analyze_reels_gradio(max_to_analyze):
	"""Gradio-compatible function to analyze fetched reels and generate plots."""
	global explore_reels_list
	global sentiment_analyzer_instance
	global content_classifier_pipeline

	if not explore_reels_list:
	# Return None for plots if no reels
	return "Error: No reels fetched yet. Please fetch reels first.", None, None

	# Ensure max_to_analyze does not exceed the number of fetched reels
	num_reels_to_process = min(max_to_analyze, len(explore_reels_list))
	reels_to_analyze = explore_reels_list[:num_reels_to_process]

	if not reels_to_analyze:
	return "Error: No reels available to analyze.", None, None


	# Initialize sentiment analyzer if not already done
	if sentiment_analyzer_instance is None:
	try:
	sentiment_analyzer_instance = ReelSentimentAnalyzer()
	# Optional: Train Hindi model if needed and data is available
	# sample_train_data = [...] # Define your training data
	# sentiment_analyzer_instance.train_hindi_model(sample_train_data)
	except Exception as e:
	return f"Error initializing Sentiment Analyzer: {e}", None, None

	# Initialize content classifier pipeline if not already done
	if content_classifier_pipeline is None:
	try:
	print("Initializing Content Classifier Pipeline...")
	content_classifier_pipeline = pipeline(
	"zero-shot-classification",
	model="facebook/bart-large-mnli",
	device=0 if torch.cuda.is_available() else -1 # Use GPU if available
	)
	print("Content Classifier Pipeline Initialized.")
	except Exception as e:
	return f"Error initializing Content Classifier: {e}", None, None


	analysis_status_messages = []
	sentiment_plot_figure = None # Changed to figure
	content_plot_figure = None # Changed to figure

	# Perform Sentiment Analysis
	try:
	analysis_status_messages.append(f"Starting Sentiment Analysis for {len(reels_to_analyze)} reels...")
	sentiment_results, detailed_sentiment_results = sentiment_analyzer_instance.analyze_reels(
	reels_to_analyze,
	max_to_analyze=len(reels_to_analyze) # Pass the actual number being processed
	)
	# Call the updated plotting function that returns a figure
	sentiment_plot_figure = plot_sentiment_pie(sentiment_results, title=f"Sentiment of {len(reels_to_analyze)} Instagram Reels")
	analysis_status_messages.append("Sentiment Analysis Complete.")
	except Exception as e:
	analysis_status_messages.append(f"Error during Sentiment Analysis: {e}")
	sentiment_plot_figure = None # Ensure plot is None on error


	# Perform Content Categorization
	try:
	analysis_status_messages.append(f"Starting Content Categorization for {len(reels_to_analyze)} reels...")
	category_counts = Counter()
	# Re-implement content analysis slightly to fit this flow using the global pipeline
	print(f"\n⏳ Analyzing content for {len(reels_to_analyze)} reels...")
	for i, reel in enumerate(reels_to_analyze, 1):
	caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''
	# Use the global classifier pipeline
	category, details = classify_reel_content(caption)
	category_counts[category] += 1

	print("\n✅ Content Analysis complete!")
	print("\n📊 Category Counts:")
	for category, count in category_counts.most_common():
	print(f"- {category.replace('_', ' ').title()}: {count}")

	# Call the updated plotting function that returns a figure
	content_plot_figure = plot_category_distribution(category_counts)
	analysis_status_messages.append("Content Categorization Complete.")

	except Exception as e:
	analysis_status_messages.append(f"Error during Content Analysis: {e}")
	content_plot_figure = None # Ensure plot is None on error


	final_status_message = "\n".join(analysis_status_messages)
	# Return the figure objects
	return final_status_message, sentiment_plot_figure, content_plot_figure


	# --- Gradio Blocks Interface ---
	with gr.Blocks() as demo:
	gr.Markdown("# Instagram Reels Analysis")

	# Login Section
	with gr.Row():
	connect_button = gr.Button("Connect Instagram")
	login_status_output = gr.Label(label="Login Status")

	# OTP Input (initially hidden)
	with gr.Row(visible=False) as otp_row:
	otp_input = gr.Textbox(label="Enter OTP Code")
	otp_submit_button = gr.Button("Submit OTP")


	# Fetch Reels Section
	with gr.Row():
	fetch_button = gr.Button("Fetch Reels")
	fetch_status_output = gr.Label(label="Fetch Status")

	# Analysis Section
	with gr.Row():
	max_reels_input = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Reels to Analyze")
	analyze_button = gr.Button("Analyze Reels")

	analyze_status_output = gr.Label(label="Analysis Status")

	# Results Section
	with gr.Row():
	# Sentiment Analysis Outputs
	with gr.Column():
	gr.Markdown("## Sentiment Analysis")
	sentiment_plot_output = gr.Plot(label="Sentiment Distribution")

	# Content Analysis Outputs
	with gr.Column():
	gr.Markdown("## Content Analysis")
	content_plot_output = gr.Plot(label="Content Distribution")


	# Link buttons to functions
	connect_button.click(
	fn=login_gradio_auto,
	inputs=None, # No direct inputs, username is preset
	outputs=[login_status_output, otp_row]
	)

	otp_submit_button.click(
	fn=submit_otp_gradio,
	inputs=otp_input,
	outputs=[login_status_output, otp_input, otp_row]
	)

	fetch_button.click(
	fn=fetch_reels_gradio,
	inputs=None, # No direct inputs needed for fetching
	outputs=fetch_status_output
	)

	analyze_button.click(
	fn=analyze_reels_gradio,
	inputs=max_reels_input, # Input is the slider value
	outputs=[analyze_status_output, sentiment_plot_output, content_plot_output] # Outputs are status and the two plots
	)

	# --- Launch the Gradio app ---
	if __name__ == "__main__":
	# This block ensures the app only launches when the script is executed directly
	# (e.g., when running `python deploy.py` or `gradio deploy.py`)
	# It prevents the app from launching automatically when the file is written in Colab.
	# When deploying to Hugging Face Spaces via `gradio deploy`, it will find and run this.
	# For Colab sharing, you can use `demo.launch(share=True)` outside this if block.

	# For standalone deploy.py, you might want to uncomment this:
	# demo.launch()

	# For Colab and `gradio deploy` compatibility, the `gradio deploy` command handles launching.
	# The `demo.launch()` line is removed here from the main script block.
	pass # Keep the __main__ block if needed for local testing setup


	# Note: When using `gradio deploy` on Hugging Face Spaces, the `demo` object is
	# automatically discovered and launched. You don't need `demo.launch()` here
	# for that specific deployment method.

	# For running directly in Colab to test before deploying:
	# demo.launch(share=True)