from typing import List, Dict, Any, Optional
from utils import SafeProgress
import os
import voyageai
import time
import numpy as np
from concurrent.futures import ThreadPoolExecutor

# Set Voyage AI API key directly
voyageai.api_key = os.getenv("VOYAGE_API_KEY")

def get_embeddings_batch(texts, model="voyage-3-large", batch_size=100):
    """Get embeddings for a list of texts in batches"""
    all_embeddings = []
    total_texts = len(texts)
    
    # Pre-process all texts to replace newlines
    texts = [text.replace("\n", " ") for text in texts]
    
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        current_count = min(i + batch_size, total_texts)
        
        try:
            response = voyageai.Embedding.create(input=batch, model=model)
            batch_embeddings = [item['embedding'] for item in response['data']]
            all_embeddings.extend(batch_embeddings)
            
            # Sleep briefly to avoid rate limits
            if i + batch_size < len(texts):
                time.sleep(0.5)
                
        except Exception as e:
            print(f"Error in batch {i//batch_size + 1}: {e}")
            # Add empty embeddings for failed batch
            all_embeddings.extend([None] * len(batch))
    
    return all_embeddings

def create_product_embeddings(products: List[str], batch_size: int = 100, progress=None) -> Dict[str, Any]:
    """
    Create embeddings for product names with optimization for duplicates
    
    Args:
        products: List of product names to create embeddings for
        batch_size: Maximum number of products to process in one batch
        progress: Optional progress tracking object (Gradio progress bar)
        
    Returns:
        Dictionary mapping product names to their embeddings
    """
    progress_tracker = SafeProgress(progress, desc="Generating embeddings")
    total_products = len(products)
    
    # Initialize results dictionary
    product_embeddings = {}
    
    # Use the same model as for ingredients (voyage-3-large)
    model = "voyage-3-large"
    
    # Process in batches with de-duplication
    progress_tracker(0.1, desc=f"Starting embeddings for {total_products} products")
    
    # De-duplication step
    unique_products = []
    product_to_index = {}
    index_map = {}  # Maps original index to index in unique_products
    
    for i, product in enumerate(products):
        if product in product_to_index:
            # Product already seen, just store the mapping
            index_map[i] = product_to_index[product]
        else:
            # New unique product
            product_to_index[product] = len(unique_products)
            index_map[i] = len(unique_products)
            unique_products.append(product)
    
    progress_tracker(0.2, desc=f"Found {len(unique_products)} unique products out of {total_products} total")
    
    if len(unique_products) == 0:
        progress_tracker(1.0, desc="No valid products to process")
        return {}
    
    # Get embeddings in batches for unique products only
    try:
        # Pre-process all texts to replace newlines
        clean_products = [product.replace("\n", " ") for product in unique_products]
        
        progress_tracker(0.3, desc=f"Calling VoyageAI API for {len(clean_products)} unique products")
        
        # Process in smaller batches for better reliability
        unique_embeddings = get_embeddings_batch(clean_products, model=model, batch_size=batch_size)
        
        # Map embeddings back to all products
        progress_tracker(0.8, desc=f"Mapping embeddings back to all products")
        for i, product in enumerate(products):
            unique_idx = index_map[i]
            if unique_idx < len(unique_embeddings) and unique_embeddings[unique_idx] is not None:
                # Store as dictionary with 'embedding' key for consistent format
                product_embeddings[product] = {
                    "embedding": unique_embeddings[unique_idx]
                }
        
        progress_tracker(0.9, desc="Processing embeddings completed")
        
    except Exception as e:
        progress_tracker(0.9, desc=f"Error generating embeddings: {str(e)}")
        print(f"Error generating product embeddings: {e}")
        
    progress_tracker(1.0, desc=f"Completed embeddings for {len(product_embeddings)} products")
    return product_embeddings

def _generate_embeddings_for_batch(batch: List[str]) -> Dict[str, Any]:
    """
    Generate embeddings for a batch of products
    """
    # This is a placeholder for your actual embedding generation logic
    # Replace with your actual implementation
    import time
    
    # Your existing embedding code should go here instead of this placeholder
    embeddings = {}
    for product in batch:
        # Replace with actual embedding creation
        embeddings[product] = {"embedding": [0.1, 0.2, 0.3]}  
    
    return embeddings