import json
import numpy as np
from typing import Dict, List, Tuple, Any
import concurrent.futures
import time
import os
from api_utils import get_openai_client, get_voyage_client, process_in_parallel, rank_ingredients_openai
from ui_formatters import format_comparison_html, create_results_container

def compare_ingredient_methods(products: List[str], ingredients_dict: Dict[str, Any],
                            embedding_top_n: int = 20, final_top_n: int = 3,
                            confidence_threshold: float = 0.5,
                            progress=None) -> Dict[str, Dict[str, List[Tuple]]]:
    """
    Compare four different methods for ingredient matching:
    1. Base embeddings (without re-ranking)
    2. Voyage AI reranker (via hybrid approach)
    3. Chicory parser
    4. GPT-4o structured output
    
    Args:
        products: List of product names to categorize
        ingredients_dict: Dictionary of ingredient names to embeddings
        embedding_top_n: Number of top ingredients to retrieve using embeddings
        final_top_n: Number of final results to show for each method
        confidence_threshold: Minimum score threshold for final results
        progress: Optional progress tracking object
        
    Returns:
        Dictionary mapping products to results from each method
    """
    from utils import SafeProgress, preprocess_product_for_matching
    from embeddings import create_product_embeddings
    from chicory_api import call_chicory_parser
    from similarity import compute_similarities
    
    progress_tracker = SafeProgress(progress, desc="Comparing ingredient matching methods")
    
    # Step 1: Generate embeddings for all products (used by multiple methods)
    progress_tracker(0.1, desc="Generating product embeddings")
    product_embeddings = create_product_embeddings(products, progress=progress_tracker)
    
    # Step 2: Get embedding-based candidates for all products
    progress_tracker(0.2, desc="Finding embedding candidates")
    similarities = compute_similarities(ingredients_dict, product_embeddings)
    
    # Filter to top N candidates per product
    embedding_results = {}
    for product, product_similarities in similarities.items():
        embedding_results[product] = product_similarities[:embedding_top_n]
    
    # Step 3: Call Chicory Parser API (this is done for all products at once)
    progress_tracker(0.3, desc="Calling Chicory Parser API")
    chicory_results = call_chicory_parser(products, progress=progress_tracker)
    
    # Create final results dictionary with base embeddings (which don't need any further processing)
    comparison_results = {}
    for product in products:
        if product in embedding_results:
            # Initialize with base embeddings already calculated
            candidates = embedding_results[product]
            base_results = [(c[0], c[1]) for c in candidates[:final_top_n] if c[1] >= confidence_threshold]
            comparison_results[product] = {
                "base": base_results,
                "voyage": [],
                "chicory": [],
                "openai": []
            }
            
            # Also process Chicory results immediately as they're already fetched
            chicory_matches = []
            if product in chicory_results:
                chicory_data = chicory_results[product]
                if isinstance(chicory_data, dict):
                    ingredient = chicory_data.get("ingredient", "")
                    confidence = chicory_data.get("confidence", 0)
                    if ingredient and confidence >= confidence_threshold:
                        chicory_matches.append((ingredient, confidence))
            comparison_results[product]["chicory"] = chicory_matches
        else:
            comparison_results[product] = {
                "base": [],
                "voyage": [],
                "chicory": [],
                "openai": []
            }
    
    # Initialize clients for reranking - REPLACED WITH UTILITY FUNCTIONS
    voyage_client = get_voyage_client()
    openai_client = get_openai_client()
    
    # Define the methods that will be executed in parallel (now focused only on the API-heavy tasks)
    def process_voyage_reranking(product):
        if product not in embedding_results or not embedding_results[product]:
            return product, []
            
        candidates = embedding_results[product]
        candidate_ingredients = [c[0] for c in candidates]
        candidate_texts = [f"Ingredient: {c[0]}" for c in candidates]
        
        try:
            # Apply Voyage reranking to the candidates
            query = product  # Use product directly as query
            reranking = voyage_client.rerank(
                query=query,
                documents=candidate_texts,
                model="rerank-2",
                top_k=final_top_n
            )
            
            # Process reranking results
            voyage_ingredients = []
            for result in reranking.results:
                # Find the ingredient for this result
                candidate_index = candidate_texts.index(result.document)
                ingredient = candidate_ingredients[candidate_index]
                score = float(result.relevance_score)
                
                # Only include results above the confidence threshold
                if score >= confidence_threshold:
                    voyage_ingredients.append((ingredient, score))
            
            return product, voyage_ingredients
        except Exception as e:
            print(f"Error during Voyage reranking for '{product}': {e}")
            # Fall back to embedding results
            return product, [(c[0], c[1]) for c in candidates[:final_top_n] if c[1] >= confidence_threshold]
    
    def process_openai(product):
        if product not in embedding_results or not embedding_results[product]:
            return product, []
            
        candidates = embedding_results[product]
        candidate_ingredients = [c[0] for c in candidates]
        
        try:
            # Use the shared utility function
            openai_ingredients = rank_ingredients_openai(
                product=product,
                candidates=candidate_ingredients,
                client=openai_client,
                model="gpt-4o-mini",
                max_results=final_top_n,
                confidence_threshold=confidence_threshold
            )
            
            return product, openai_ingredients
        except Exception as e:
            print(f"Error during OpenAI processing for '{product}': {e}")
            # Fall back to embedding results
            return product, [(c[0], c[1]) for c in candidates[:final_top_n] if c[1] >= confidence_threshold]
    
    # Process Voyage AI reranking in parallel - REPLACED WITH SHARED UTILITY
    progress_tracker(0.4, desc="Running Voyage AI reranking in parallel")
    voyage_results = process_in_parallel(
        items=products,
        processor_func=process_voyage_reranking,
        max_workers=min(20, len(products)),
        progress_tracker=progress_tracker,
        progress_start=0.4,
        progress_end=0.65,
        progress_desc="Voyage AI"
    )
    
    # Update comparison results with Voyage results
    for product, results in voyage_results.items():
        if product in comparison_results:
            comparison_results[product]["voyage"] = results
    
    # Process OpenAI queries in parallel - REPLACED WITH SHARED UTILITY
    progress_tracker(0.7, desc="Running OpenAI processing in parallel")
    openai_results = process_in_parallel(
        items=products,
        processor_func=process_openai,
        max_workers=min(20, len(products)),
        progress_tracker=progress_tracker,
        progress_start=0.7,
        progress_end=0.95,
        progress_desc="OpenAI"
    )
    
    # Update comparison results with OpenAI results
    for product, results in openai_results.items():
        if product in comparison_results:
            comparison_results[product]["openai"] = results
    
    progress_tracker(1.0, desc="Comparison complete")
    return comparison_results

def compare_ingredient_methods_ui(product_input, is_file=False, embedding_top_n=20, 
                                final_top_n=3, confidence_threshold=0.5, progress=None):
    """
    Compare multiple ingredient matching methods on the same products
    
    Args:
        product_input: Text input with product names or file path
        is_file: Whether the input is a file
        embedding_top_n: Number of top ingredients to retrieve using embeddings
        final_top_n: Number of final results to show for each method
        confidence_threshold: Minimum score threshold for final results
        progress: Optional progress tracking object
        
    Returns:
        HTML formatted comparison results
    """
    from utils import SafeProgress, load_embeddings
    
    progress_tracker = SafeProgress(progress, desc="Comparing ingredient matching methods")
    progress_tracker(0.1, desc="Processing input")
    

    # Split text input by lines and remove empty lines
    if not product_input:
        return "Please enter at least one product."
    product_names = [p.strip() for p in product_input.split('\n') if p.strip()]
    if not product_names:
        return "Please enter at least one product."

    # Load ingredient embeddings
    try:
        progress_tracker(0.2, desc="Loading ingredient embeddings")
        ingredients_dict = load_embeddings("data/ingredient_embeddings_voyageai.pkl")
        
        progress_tracker(0.3, desc="Comparing methods")
        comparison_results = compare_ingredient_methods(
            products=product_names,
            ingredients_dict=ingredients_dict,
            embedding_top_n=embedding_top_n,
            final_top_n=final_top_n,
            confidence_threshold=confidence_threshold,
            progress=progress_tracker
        )
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"<div style='color: red;'>Error comparing methods: {str(e)}<br><pre>{error_details}</pre></div>"
    
    # Format results as HTML using centralized formatters
    progress_tracker(0.9, desc="Formatting results")
    
    result_elements = []
    for product in product_names:
        if product in comparison_results:
            result_elements.append(format_comparison_html(product, comparison_results[product]))
    
    output_html = create_results_container(
        result_elements, 
        header_text=f"Comparing {len(product_names)} products using multiple ingredient matching methods."
    )
    
    progress_tracker(1.0, desc="Complete")
    return output_html