import gradio as gr
from utils import SafeProgress
from embeddings import create_product_embeddings
from similarity import compute_similarities
from chicory_api import call_chicory_parser
from ui_core import embeddings, parse_input
from ui_formatters import format_categories_html, create_results_container
from openai_expansion import expand_product_descriptions
def categorize_products(product_input, is_file=False, use_expansion=False, top_n=10, confidence_threshold=0.5):
"""Categorize products from text input or file"""
progress_tracker = SafeProgress(gr.Progress())
progress_tracker(0, desc="Starting...")
# Parse input
product_names, error = parse_input(product_input, is_file)
if error:
return error
# Validate embeddings are loaded
if not embeddings:
return "
Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.
"
# Optional description expansion
expanded_descriptions = {}
if use_expansion:
progress_tracker(0.2, desc="Expanding product descriptions...")
expanded_descriptions = expand_product_descriptions(product_names, progress=gr.Progress())
# Create embeddings
progress_tracker(0.4, desc="Generating product embeddings...")
if use_expansion and expanded_descriptions:
# Use expanded descriptions for embedding creation when available
products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names]
# Map expanded descriptions back to original product names for consistent keys
products_embeddings = {}
temp_embeddings = create_product_embeddings(products_for_embedding, progress=gr.Progress())
# Ensure we use original product names as keys
for i, product_name in enumerate(product_names):
if i < len(products_for_embedding) and products_for_embedding[i] in temp_embeddings:
products_embeddings[product_name] = temp_embeddings[products_for_embedding[i]]
else:
# Standard embedding creation with just product names
products_embeddings = create_product_embeddings(product_names, progress=gr.Progress())
if not products_embeddings:
return "Error: Failed to generate product embeddings. Please try again with different product names.
"
# Call Chicory Parser API
progress_tracker(0.6, desc="Calling Chicory Parser API...")
chicory_results = call_chicory_parser(product_names, progress=gr.Progress())
# Compute similarities
progress_tracker(0.8, desc="Computing similarities...")
all_similarities = compute_similarities(embeddings, products_embeddings)
# Format results
progress_tracker(0.9, desc="Formatting results...")
output_html = f"Processing {len(product_names)} products.
"
for product, similarities in all_similarities.items():
filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold]
top_similarities = filtered_similarities[:int(top_n)]
# Add expansion explanation if available
expansion_text = expanded_descriptions.get(product, "") if use_expansion else ""
# Debug info for Chicory results
chicory_data = chicory_results.get(product, [])
output_html += format_categories_html(
product,
top_similarities,
chicory_result=chicory_data,
explanation=expansion_text,
match_type="ingredients",
)
output_html += "
"
output_html += ""
if not all_similarities:
output_html = "No results found. Please check your input or try different products.
"
progress_tracker(1.0, desc="Done!")
return create_results_container(output_html)