import json
import numpy as np
from typing import Dict, List, Tuple, Any

from category_matching import hybrid_category_matching
from similarity import hybrid_ingredient_matching

from api_utils import process_in_parallel, rank_ingredients_openai
from ui_formatters import format_comparison_html

# from utils import SafeProgress # Removed SafeProgress import
from chicory_api import call_chicory_parser
from embeddings import create_product_embeddings
from similarity import compute_similarities

def compare_ingredient_methods(products: List[str], ingredients_dict: Dict[str, Any],
                            embedding_top_n: int = 20, final_top_n: int = 3,
                            confidence_threshold: float = 0.5, match_type="ingredients",
                            expanded_descriptions=None) -> Dict[str, Dict[str, List[Tuple]]]: # Removed progress parameter
    """
    Compare multiple ingredient/category matching methods on the same products
    
    Args:
        products: List of product names to process
        ingredients_dict: Dictionary with ingredient embeddings
        embedding_top_n: Number of top ingredients to retrieve using embeddings
        final_top_n: Number of final results to show for each method
        confidence_threshold: Minimum score threshold for final results
        match_type: Type of matching to perform ('ingredients' or 'categories')
        progress: Optional progress tracking object
        
    Returns:
        Dictionary mapping products to methods and their results
    """

    # Add debug info at the beginning
    if expanded_descriptions:
        print(f"Expanded descriptions provided with {len(expanded_descriptions)} entries")
        # Check a sample product to confirm it has expanded text
        if products and products[0] in expanded_descriptions:
            sample = expanded_descriptions[products[0]]
            print(f"Sample expansion for '{products[0]}': {sample[:50]}...")
        else:
            print(f"WARNING: First product '{products[0] if products else 'None'}' not found in expanded descriptions")
    
    # Removed Gradio progress tracking
    # progress_tracker = SafeProgress(progress, desc="Comparing matching methods")
    
    # Step 1: Generate embeddings for all products (used by multiple methods)
    # progress_tracker(0.1, desc="Generating product embeddings") # Removed progress
    # Use expanded descriptions for embeddings if available
    if expanded_descriptions:
        expanded_product_texts = [expanded_descriptions.get(p, p) for p in products]
        product_embeddings = create_product_embeddings(expanded_product_texts,
                                                      original_products=products)  # Keep original product IDs, removed progress
    else:
        product_embeddings = create_product_embeddings(products) # Removed progress
    
    # Step 2: Get embedding-based candidates for all products
    # progress_tracker(0.2, desc="Finding embedding candidates") # Removed progress
    similarities = compute_similarities(ingredients_dict, product_embeddings)
    
    # Filter to top N candidates per product
    embedding_results = {}
    for product, product_similarities in similarities.items():
        embedding_results[product] = product_similarities[:embedding_top_n]
    
    # Step 3: Process with Chicory Parser
    # progress_tracker(0.3, desc="Running Chicory Parser") # Removed progress
    # Import here to avoid circular imports
    # from chicory_parser import parse_products
    
    chicory_results = call_chicory_parser(products) # Removed progress
    
    # Initialize result structure
    comparison_results = {}
    for product in products:
        comparison_results[product] = {
            "base": [],
            "voyage": [],
            "chicory": [],
            "openai": []
        }
        
        # Add basic embedding results
        if product in embedding_results:
            base_results = []
            for name, score in embedding_results[product]:
                if score >= confidence_threshold:
                    base_results.append((name, score))
            comparison_results[product]["base"] = base_results[:final_top_n]
            
        # Process Chicory results
        chicory_matches = []
        if product in chicory_results:
            chicory_data = chicory_results[product]
            if isinstance(chicory_data, dict):
                # Handle different response formats based on match type
                ingredient = chicory_data.get("ingredient", "")
                confidence = chicory_data.get("confidence", 0)
                if ingredient and confidence >= confidence_threshold:
                    chicory_matches.append((ingredient, confidence))
                        
        comparison_results[product]["chicory"] = chicory_matches
    
    # Step 4: Process with Voyage AI
    # progress_tracker(0.4, desc="Processing with Voyage AI") # Removed progress
    
    # Define processing function for Voyage
    def process_voyage(product):
        try:
            # Get candidates from embedding results
            candidates = []
            if product in embedding_results:
                candidates = embedding_results[product]
            
            if not candidates:
                print(f"No candidates found for product: {product}")
                return product, []

            # Create a proper dictionary with just this product if expanded_descriptions exists
            expanded_product_desc = None
            if expanded_descriptions and product in expanded_descriptions:
                expanded_product_desc = {product: expanded_descriptions.get(product)}
            
            # Rerank using Voyage
            try:
                if match_type == "ingredients":

                    # Convert candidates to the expected dictionary format
                    ingredient_dict = {}
                    for c in candidates:
                        if c[0] in ingredients_dict:  # Get from the original embeddings
                            ingredient_dict[c[0]] = ingredients_dict[c[0]]
                        
                    results = hybrid_ingredient_matching(
                        [product],  # Pass as a list of one product
                        ingredient_dict,
                        expanded_descriptions=expanded_product_desc
                    )
                else:
                    # Convert candidates to the expected format
                    candidate_dict = {c[0]: c[0] for c in candidates}
                    results = hybrid_category_matching(
                        products=[product],
                        categories=candidate_dict,
                        embedding_top_n=embedding_top_n,
                        final_top_n=final_top_n,
                        confidence_threshold=confidence_threshold,
                        expanded_descriptions=expanded_descriptions
                    )
                    
                # Handle special case: if results is a dictionary with product as key
                if isinstance(results, dict):
                    results = results.get(product, [])
                # No need to check 'product in results' when results is not a dict
                
                # Ensure results are in the expected format
                formatted_results = []
                added_ids = set() # Keep track of added category IDs to avoid duplicates
                for r in results[:final_top_n]:
                    if isinstance(r, dict) and "name" in r and "score" in r:
                        # Convert score to float to ensure type compatibility
                        try:
                            score = float(r["score"])
                            name = r["name"] # Extract name for check
                            if score >= confidence_threshold:
                                if name not in added_ids: # Check for duplicates
                                    formatted_results.append((name, score))
                                    added_ids.add(name) # Add ID to set
                        except (ValueError, TypeError):
                            print(f"Invalid score format in result: {r}")
                    elif isinstance(r, tuple) and len(r) >= 2:
                        try:
                            # Handle 3-element tuples from category matching (id, description, score)
                            if len(r) >= 3:
                                score = float(r[2])  # Score is the third element
                                name = r[0]  # Use category ID as the name
                            else:
                                # Standard 2-element tuple (name, score)
                                score = float(r[1])
                                name = r[0]
                                
                            if score >= confidence_threshold:
                                if name not in added_ids: # Check for duplicates
                                    formatted_results.append((name, score))
                                    added_ids.add(name) # Add ID to set
                        except (ValueError, TypeError):
                            print(f"Invalid score format in tuple: {r}")
                
                return product, formatted_results
            except Exception as e:
                print(f"Error in Voyage AI reranking for {product}: {str(e)}")
                # Fall back to embedding results
                return product, [(c[0], c[1]) for c in candidates[:final_top_n] 
                                if c[1] >= confidence_threshold]
                
        except Exception as e:
            print(f"Error processing {product} with Voyage: {str(e)}")
            # Return an empty result as the ultimate fallback
            return product, []
    
    # Process all products with Voyage in parallel
    voyage_results = process_in_parallel(
        items=products,
        processor_func=process_voyage,
        max_workers=min(20, len(products))
        # Removed ALL progress tracking arguments
    )
    
    # Update comparison results with Voyage results
    for product, results in voyage_results.items():
        if product in comparison_results:
            comparison_results[product]["voyage"] = results
    
    # Step 5: Process with OpenAI
    # progress_tracker(0.7, desc="Running OpenAI processing in parallel") # Removed progress
    
    # Define processing function for OpenAI
    def process_openai(product):
        try:
            # Get candidates from embedding results
            candidates = []
            if product in embedding_results:
                candidates = embedding_results[product]
            
            if not candidates:
                return product, []
            
            from api_utils import rank_ingredients_openai
            
            # Extract just the names for OpenAI
            candidate_names = [c[0] for c in candidates]
            
            # Get expanded description if available
            expanded_text = expanded_descriptions.get(product, product) if expanded_descriptions else product
            
            # Use appropriate function based on match type
            if match_type == "ingredients":
                ranked_candidates = rank_ingredients_openai(
                    product=product, 
                    candidates=candidate_names,
                    expanded_description=expanded_text
                )
            else:
                # For categories, use a similar function but with category prompt
                from api_utils import rank_categories_openai
                
                # Convert the list of names to the dictionary format expected by rank_categories_openai
                categories_dict = {name: name for name in candidate_names}
                
                ranked_candidates = rank_categories_openai(
                    product=product, 
                    categories=categories_dict,
                    expanded_description=expanded_text
                )
                    
            return product, [(c[0], c[1]) for c in ranked_candidates[:final_top_n] 
                            if c[1] >= confidence_threshold]
        except Exception as e:
            print(f"Error processing {product} with OpenAI: {str(e)}")
            return product, []
    
    # Process all products with OpenAI in parallel
    openai_results = process_in_parallel(
        items=products,
        processor_func=process_openai,
        max_workers=min(20, len(products))
        # Removed ALL progress tracking arguments
    )
    
    # Update comparison results with OpenAI results
    for product, results in openai_results.items():
        if product in comparison_results:
            comparison_results[product]["openai"] = results
    
    # After processing with each method, ensure consistent format
    for product, method_results in comparison_results.items():
        # Ensure all results are in the same format
        for method in method_results:
            formatted_results = []
            for item in method_results[method]:
                # Convert all results to (name, score) tuples
                if isinstance(item, tuple) and len(item) >= 2:
                    formatted_results.append((str(item[0]), float(item[1])))
                elif isinstance(item, dict):
                    if "ingredient" in item:
                        name = item["ingredient"]
                    elif "category" in item:
                        name = item["category"]
                    else:
                        name = str(item)
                    
                    if "relevance_score" in item:
                        score = float(item["relevance_score"])
                    elif "confidence" in item:
                        score = float(item["confidence"])
                    else:
                        score = 0.0
                    
                    formatted_results.append((name, score))
                else:
                    formatted_results.append((str(item), 0.0))
            
            method_results[method] = formatted_results

    # progress_tracker(1.0, desc="Comparison complete") # Removed progress
    return comparison_results

def compare_ingredient_methods_ui(product_input, embedding_top_n=20, 
                                final_top_n=3, confidence_threshold=0.5, 
                                match_type="categories", use_expansion=False): # Removed progress parameter
    """
    Compare multiple ingredient matching methods on the same products
    
    Args:
        product_input: Text input with product names or file path
        embedding_top_n: Number of top ingredients to retrieve using embeddings
        final_top_n: Number of final results to show for each method
        confidence_threshold: Minimum score threshold for final results
        match_type: Type of matching to perform ('ingredients' or 'categories')
        use_expansion: Whether to use description expansion
        progress: Optional progress tracking object
        
    Returns:
        HTML formatted comparison results
    """
    # from utils import SafeProgress # Removed SafeProgress import
    from utils import load_embeddings
    
    # Removed Gradio progress tracking
    # progress_tracker = SafeProgress(progress, desc="Comparing matching methods")
    # progress_tracker(0.1, desc="Processing input")
    
    # Split text input by lines and remove empty lines
    if not product_input:
        return "Please enter at least one product."
    product_names = [p.strip() for p in product_input.split('\n') if p.strip()]
    if not product_names:
        return "Please enter at least one product."

    # Load appropriate embeddings based on match type
    try:
        # progress_tracker(0.2, desc="Loading embeddings") # Removed progress
        if match_type == "ingredients":
            embeddings_path = "data/ingredient_embeddings_voyageai.pkl"
            embeddings_dict = load_embeddings(embeddings_path)
            header_text = f"Comparing {len(product_names)} products using multiple ingredient matching methods."
        else:  # categories
            embeddings_path = "data/category_embeddings.pickle"
            embeddings_dict = load_embeddings(embeddings_path)
            header_text = f"Comparing {len(product_names)} products using multiple category matching methods."
        
        # Initialize expanded_products variable
        expanded_products = None
        
        print("USE EXPANSION:", use_expansion)
        # Expand descriptions if requested
        if use_expansion:
            from openai_expansion import expand_product_descriptions
            # progress_tracker(0.25, desc="Expanding product descriptions") # Removed progress
            expanded_products = expand_product_descriptions(product_names) # Removed progress argument
            # Add at beginning of results
            header_text = f"Comparing {len(product_names)} products using multiple {match_type} matching methods WITH expanded descriptions."
        
        # progress_tracker(0.3, desc="Comparing methods") # Removed progress
        comparison_results = compare_ingredient_methods(
            products=product_names,
            ingredients_dict=embeddings_dict,
            embedding_top_n=embedding_top_n,
            final_top_n=final_top_n,
            confidence_threshold=confidence_threshold,
            match_type=match_type, # Added missing comma
            # Removed progress argument
            expanded_descriptions=expanded_products
        )
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"<div style='color: red;'>Error comparing methods: {str(e)}<br><pre>{error_details}</pre></div>"
    
    # Format results as HTML using centralized formatters
    # progress_tracker(0.9, desc="Formatting results") # Removed progress
    result_elements = []
    for product in product_names:
        if product in comparison_results:
            expanded_text = expanded_products.get(product, "") if expanded_products else ""
            result_elements.append(format_comparison_html(
                product, 
                comparison_results[product],
                expanded_description=expanded_text
            ))
    
    # Join the HTML elements for each product directly
    # Add the header text manually before joining
    output_html = f"<p>{header_text}</p>" + "".join(result_elements)
    
    # progress_tracker(1.0, desc="Complete") # Removed progress
    return output_html