import gradio as gr from utils import SafeProgress from embeddings import create_product_embeddings from similarity import compute_similarities from chicory_api import call_chicory_parser from ui_core import embeddings, parse_input from ui_formatters import format_categories_html, create_results_container from openai_expansion import expand_product_descriptions def categorize_products(product_input, is_file=False, use_expansion=False, top_n=10, confidence_threshold=0.5): """Categorize products from text input or file""" progress_tracker = SafeProgress(gr.Progress()) progress_tracker(0, desc="Starting...") # Parse input product_names, error = parse_input(product_input, is_file) if error: return error # Validate embeddings are loaded if not embeddings: return "

Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.

" # Optional description expansion expanded_descriptions = {} if use_expansion: progress_tracker(0.2, desc="Expanding product descriptions...") expanded_descriptions = expand_product_descriptions(product_names, progress=gr.Progress()) # Create embeddings progress_tracker(0.4, desc="Generating product embeddings...") if use_expansion and expanded_descriptions: # Use expanded descriptions for embedding creation when available products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names] # Map expanded descriptions back to original product names for consistent keys products_embeddings = {} temp_embeddings = create_product_embeddings(products_for_embedding, progress=gr.Progress()) # Ensure we use original product names as keys for i, product_name in enumerate(product_names): if i < len(products_for_embedding) and products_for_embedding[i] in temp_embeddings: products_embeddings[product_name] = temp_embeddings[products_for_embedding[i]] else: # Standard embedding creation with just product names products_embeddings = create_product_embeddings(product_names, progress=gr.Progress()) if not products_embeddings: return "

Error: Failed to generate product embeddings. Please try again with different product names.

" # Call Chicory Parser API progress_tracker(0.6, desc="Calling Chicory Parser API...") chicory_results = call_chicory_parser(product_names, progress=gr.Progress()) # Compute similarities progress_tracker(0.8, desc="Computing similarities...") all_similarities = compute_similarities(embeddings, products_embeddings) # Format results progress_tracker(0.9, desc="Formatting results...") output_html = f"

Processing {len(product_names)} products.

" for product, similarities in all_similarities.items(): filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold] top_similarities = filtered_similarities[:int(top_n)] # Add expansion explanation if available expansion_text = expanded_descriptions.get(product, "") if use_expansion else "" # Debug info for Chicory results chicory_data = chicory_results.get(product, []) output_html += format_categories_html( product, top_similarities, chicory_result=chicory_data, explanation=expansion_text, match_type="ingredients", ) output_html += "

" output_html += "" if not all_similarities: output_html = "

No results found. Please check your input or try different products.

" progress_tracker(1.0, desc="Done!") return create_results_container(output_html)