Spaces:
Sleeping
Sleeping
import gradio as gr | |
from utils import SafeProgress | |
from embeddings import create_product_embeddings | |
from similarity import compute_similarities | |
from chicory_api import call_chicory_parser | |
from ui_core import embeddings, parse_input | |
from ui_formatters import format_categories_html | |
from openai_expansion import expand_product_descriptions | |
def categorize_products(product_input, is_file=False, use_expansion=False, top_n=10, confidence_threshold=0.5): | |
"""Categorize products from text input or file""" | |
progress_tracker = SafeProgress(gr.Progress()) | |
progress_tracker(0, desc="Starting...") | |
# Parse input | |
product_names, error = parse_input(product_input, is_file) | |
if error: | |
return error | |
# Validate embeddings are loaded | |
if not embeddings: | |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>" | |
# Optional description expansion | |
expanded_descriptions = {} | |
if use_expansion: | |
progress_tracker(0.2, desc="Expanding product descriptions...") | |
expanded_descriptions = expand_product_descriptions(product_names) # Removed progress | |
# Create embeddings | |
progress_tracker(0.4, desc="Generating product embeddings...") | |
if use_expansion and expanded_descriptions: | |
# Use expanded descriptions for embedding creation when available | |
products_for_embedding = [expanded_descriptions.get(name, name) for name in product_names] | |
# Map expanded descriptions back to original product names for consistent keys | |
products_embeddings = {} | |
temp_embeddings = create_product_embeddings(products_for_embedding, original_products=product_names) # Removed progress, pass original names for keys | |
# Ensure we use original product names as keys | |
# Corrected loop: Iterate through original names and use them as keys | |
for product_name in product_names: | |
# Check if the original product name exists as a key in the returned embeddings | |
if product_name in temp_embeddings: | |
products_embeddings[product_name] = temp_embeddings[product_name] | |
else: | |
# Standard embedding creation with just product names | |
products_embeddings = create_product_embeddings(product_names) # Removed progress | |
if not products_embeddings: | |
return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings. Please try again with different product names.</div>" | |
# Call Chicory Parser API | |
progress_tracker(0.6, desc="Calling Chicory Parser API...") | |
chicory_results = call_chicory_parser(product_names, progress=gr.Progress()) | |
# Compute similarities | |
progress_tracker(0.8, desc="Computing similarities...") | |
all_similarities = compute_similarities(embeddings, products_embeddings) | |
# Format results | |
progress_tracker(0.9, desc="Formatting results...") | |
output_html = f"<p style='color: #555;'>Processing {len(product_names)} products.</p>" | |
for product, similarities in all_similarities.items(): | |
filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold] | |
top_similarities = filtered_similarities[:int(top_n)] | |
# Add expansion explanation if available | |
expansion_text = expanded_descriptions.get(product, "") if use_expansion else "" | |
# Debug info for Chicory results | |
chicory_data = chicory_results.get(product, []) | |
output_html += format_categories_html( | |
product, | |
top_similarities, | |
chicory_result=chicory_data, | |
explanation=expansion_text, | |
match_type="ingredients", | |
) | |
output_html += "<hr style='margin: 15px 0; border: 0; border-top: 1px solid #eee;'>" | |
output_html += "</div>" | |
if not all_similarities: | |
output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>" | |
progress_tracker(1.0, desc="Done!") | |
return output_html # Return the generated HTML directly | |