Spaces:
Sleeping
Sleeping
from typing import List, Dict, Tuple, Any | |
from utils import get_confidence_color, get_confidence_bg_color | |
# Method colors (consistent across themes) | |
METHOD_COLORS = { | |
"base": "#f39c12", # Orange | |
"voyage": "#3498db", # Blue | |
"chicory": "#9b59b6", # Purple | |
"openai": "#2ecc71", # Green | |
"expanded": "#e74c3c", # Red | |
"hybrid": "#1abc9c", # Turquoise | |
"categories": "#1abc9c", # Same as hybrid | |
"ingredients": "#f39c12" # Same as base | |
} | |
# Method display names | |
METHOD_NAMES = { | |
"base": "Base Embeddings", | |
"voyage": "Voyage AI Reranker", | |
"chicory": "Chicory Parser", | |
"openai": "OpenAI Reranker", | |
"expanded": "Expanded Description", | |
"hybrid": "Hybrid Matching", | |
"categories": "Category Matches", | |
"ingredients": "Ingredient Matches" | |
} | |
def parse_result_item(item): | |
"""Helper function to parse result items into display text and score""" | |
display_text = str(item) # Default | |
score = 0.0 | |
# Handle tuple formats | |
if isinstance(item, tuple): | |
if len(item) == 2: | |
match, score = item | |
display_text = str(match) | |
elif len(item) == 3: | |
id_val, text, score = item | |
display_text = f"<strong>{id_val}</strong>: {text}" if text else str(id_val) | |
# Handle dictionary formats | |
elif isinstance(item, dict): | |
if "name" in item and "score" in item: | |
display_text = item["name"] | |
score = item["score"] | |
elif "category" in item and "confidence" in item: | |
display_text = item["category"] | |
score = item["confidence"] | |
elif "ingredient" in item and "relevance_score" in item: | |
display_text = item["ingredient"] | |
score = item["relevance_score"] | |
# Ensure score is a float | |
try: | |
score = float(score) | |
except (ValueError, TypeError): | |
score = 0.0 | |
return display_text, score | |
def filter_results_by_threshold(results, confidence_threshold=0.0): | |
"""Helper function to filter results by confidence threshold""" | |
filtered_results = [] | |
for item in results: | |
_, score = parse_result_item(item) | |
if score >= confidence_threshold: | |
filtered_results.append(item) | |
return filtered_results | |
def format_confidence_badge(score): | |
"""Formats the confidence score as a styled span (badge)""" | |
confidence_percent = int(score * 100) | |
confidence_color = get_confidence_color(score) | |
bg_color = get_confidence_bg_color(score) | |
# Determine text color based on background lightness for better contrast | |
# Simple heuristic: if bg_color is light (e.g., yellow, light green), use black text, otherwise use white/light text. | |
# This is approximate. A proper solution would involve calculating luminance. | |
light_bgs = ["#ffffcc", "#ccffcc", "#cceeff"] # Add more light hex codes if needed | |
text_color = "#000000" if bg_color.lower() in light_bgs else "var(--text-color)" # Default to theme text color otherwise | |
return ( | |
f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: {text_color}; " | |
f"font-weight: 600; padding: 3px 8px; border-radius: 5px; font-size: 0.9em; " | |
f"min-width: 80px; text-align: center; display: inline-block; margin-left: 10px;'>" | |
f"{confidence_percent}%</span>" | |
) | |
def format_result_list_html(results, confidence_threshold=0.0): | |
"""Formats a list of results (matches) into an HTML list.""" | |
filtered_results = filter_results_by_threshold(results, confidence_threshold) | |
if not filtered_results: | |
return "<p style='color: grey; font-style: italic; margin: 10px 0;'>No matches found above confidence threshold.</p>" | |
html = "<ul style='list-style-type: none; padding-left: 0; margin-top: 10px;'>" | |
for item in filtered_results: | |
display_text, score = parse_result_item(item) | |
badge = format_confidence_badge(score) | |
# Reintroduce subtle background and border using theme variables | |
html += ( | |
f"<li style='display: flex; justify-content: space-between; align-items: center; " | |
f"margin-bottom: 8px; padding: 8px; border-radius: 4px; " | |
f"background-color: var(--secondary-background-color); " # Use secondary background | |
f"border: 1px solid rgba(128, 128, 128, 0.2);'>" # Subtle grey border | |
f"<span style='flex-grow: 1; margin-right: 10px; word-wrap: break-word; color: var(--text-color);'>{display_text}</span>" # Ensure text uses theme color | |
f"{badge}" | |
f"</li>" | |
) | |
html += "</ul>" | |
return html | |
def format_result_card(title, content_html): | |
"""Creates a basic card structure for a single product result.""" | |
# Use secondary background for the card, slightly stronger border | |
return ( | |
f"<div style='margin-bottom: 20px; border: 1px solid rgba(128, 128, 128, 0.3); border-radius: 8px; padding: 15px; background-color: var(--secondary-background-color);'>" | |
f"<h3 style='margin-top: 0; margin-bottom: 15px; font-size: 1.1em; border-bottom: 1px solid rgba(128, 128, 128, 0.2); padding-bottom: 10px; color: var(--text-color);'>{title}</h3>" # Ensure header uses theme text color | |
f"{content_html}" | |
f"</div>" | |
) | |
def format_info_panel(title, text): | |
"""Formats an informational panel (e.g., for expanded description).""" | |
# Use a slightly different background, maybe derived from primary color with transparency | |
# Or stick to secondary background for consistency | |
return ( | |
f"<div style='border-left: 4px solid var(--primary-color); padding: 10px 15px; margin-bottom: 15px; border-radius: 4px; background-color: var(--secondary-background-color);'>" | |
f"<h4 style='margin-top: 0; margin-bottom: 8px; font-size: 1em; color: var(--text-color);'>{title}</h4>" # Use theme text color | |
f"<p style='margin-bottom: 0; font-size: 0.95em; color: var(--text-color);'>{text}</p>" # Use theme text color | |
f"</div>" | |
) | |
def format_method_results_section(method_key, results, confidence_threshold=0.0): | |
"""Formats results for a specific method within a comparison.""" | |
method_name = METHOD_NAMES.get(method_key, method_key.replace('_', ' ').title()) | |
color_hex = METHOD_COLORS.get(method_key, "var(--text-color)") # Fallback to theme text color | |
results_html = format_result_list_html(results, confidence_threshold) | |
# Keep the method-specific color for the header border | |
return ( | |
f"<div style='margin-bottom: 15px; padding-bottom: 15px; border-bottom: 1px dashed rgba(128, 128, 128, 0.2);'>" # Subtle dashed border between methods | |
f"<h4 style='margin-top: 0; margin-bottom: 10px; color: {color_hex}; border-bottom: 2px solid {color_hex}; padding-bottom: 5px; display: inline-block;'>{method_name}</h4>" | |
f"{results_html}" | |
f"</div>" | |
) | |
# --- Main Formatting Functions Called by UI Tabs --- | |
def format_comparison_html(product, method_results, expanded_description="", confidence_threshold=0.5): | |
"""Formats the comparison results for multiple methods for one product.""" | |
content_html = "" | |
if expanded_description: | |
content_html += format_info_panel("Expanded Description", expanded_description) | |
method_order = ["base", "voyage", "chicory", "openai"] | |
sections_html = [] | |
for method_key in method_order: | |
if method_key in method_results and method_results[method_key]: | |
sections_html.append(format_method_results_section( | |
method_key=method_key, | |
results=method_results.get(method_key, []), | |
confidence_threshold=confidence_threshold | |
)) | |
# Join sections, remove last border if sections exist | |
if sections_html: | |
# Remove the border-bottom style from the last section's div | |
last_section = sections_html[-1] | |
if "border-bottom: 1px dashed rgba(128, 128, 128, 0.2);" in last_section: | |
sections_html[-1] = last_section.replace("border-bottom: 1px dashed rgba(128, 128, 128, 0.2);", "") | |
content_html += "".join(sections_html) | |
return format_result_card(title=product, content_html=content_html) | |
def format_reranking_results_html(results, match_type="ingredients", show_scores=True, include_explanation=False, | |
method="voyage", confidence_threshold=0.0): | |
"""Formats results from reranking methods (Voyage, OpenAI).""" | |
if not results: | |
return f"<p style='color: grey; font-style: italic;'>No {match_type.lower()} matches found.</p>" | |
html_elements = [] | |
for result in results: | |
product_name = result.get("product_name", "Unknown Product") | |
matching_items = result.get("matching_items", []) | |
item_scores = result.get("item_scores", []) | |
explanation = result.get("explanation", "") if include_explanation else "" | |
if len(item_scores) != len(matching_items): | |
item_scores = [result.get("confidence", 0.0)] * len(matching_items) | |
formatted_matches = [] | |
for i, item in enumerate(matching_items): | |
if ":" in str(item) and match_type == "categories": | |
parts = str(item).split(":", 1) | |
id_val = parts[0].strip() | |
text = parts[1].strip() if len(parts) > 1 else "" | |
formatted_matches.append((id_val, text, item_scores[i])) | |
else: | |
formatted_matches.append((str(item), item_scores[i])) | |
content_html = "" | |
if explanation: | |
content_html += format_info_panel("Expanded Description", explanation) | |
list_html = format_result_list_html(formatted_matches, confidence_threshold) | |
content_html += list_html | |
# Only add card if there's content to show (explanation or non-empty list html) | |
if explanation or "<li" in list_html: # Check if list contains items | |
html_elements.append(format_result_card(title=product_name, content_html=content_html)) | |
return "".join(html_elements) | |
def format_categories_html(product, categories, chicory_result=None, explanation="", match_type="categories", confidence_threshold=0.0): | |
"""Formats category or ingredient matching results (non-reranked).""" | |
content_html = "" | |
has_content = False | |
if explanation: | |
content_html += format_info_panel("Expanded Description", explanation) | |
has_content = True | |
# Add Chicory results if available and applicable | |
if chicory_result and match_type == "ingredients": | |
chicory_title = METHOD_NAMES.get("chicory", "Chicory Parser") | |
content_html += f"<h4 style='margin-top: 15px; margin-bottom: 10px; color: {METHOD_COLORS.get('chicory', '#9b59b6')};'>{chicory_title}</h4>" | |
if isinstance(chicory_result, dict): | |
ingredient = chicory_result.get("ingredient", "Not found") | |
score = chicory_result.get("confidence", 0) | |
badge = format_confidence_badge(score) | |
content_html += ( | |
f"<div style='display: flex; justify-content: space-between; align-items: center; padding: 8px; border-radius: 4px; margin-bottom: 15px; background-color: var(--secondary-background-color); border: 1px solid rgba(128, 128, 128, 0.2);'>" # Use theme bg/border | |
f"<span style='flex-grow: 1; margin-right: 10px; color: var(--text-color);'>{ingredient}</span>" # Use theme text color | |
f"{badge}" | |
f"</div>" | |
) | |
has_content = True | |
else: | |
content_html += "<p style='color: grey; font-style: italic;'>No Chicory results available.</p>" | |
# Add the main category/ingredient results | |
match_title = METHOD_NAMES.get(match_type, match_type.capitalize()) | |
color_hex = METHOD_COLORS.get(match_type, "var(--text-color)") # Fallback to theme text color | |
content_html += f"<h4 style='margin-top: 15px; margin-bottom: 10px; color: {color_hex};'>{match_title}</h4>" | |
list_html = format_result_list_html(categories, confidence_threshold) | |
content_html += list_html | |
if "<li" in list_html: # Check if list contains items | |
has_content = True | |
# Only return card if there's content to show | |
if has_content: | |
return format_result_card(title=product, content_html=content_html) | |
else: | |
return "" # Return empty string if nothing to show | |