Spaces:

eliago
/

product_ingredient_demo

Sleeping

App Files Files Community

esilver commited on Mar 24

Commit

1e737a1

1 Parent(s): 9a56a50

Added expansion for embeddings

Browse files

Files changed (7) hide show

api_utils.py +2 -2
comparison.py +1 -1
similarity.py +1 -1
ui.py +12 -2
ui_category_matching.py +27 -6
ui_formatters.py +11 -2
ui_ingredient_matching.py +27 -14

api_utils.py CHANGED Viewed

@@ -161,7 +161,7 @@ def rank_ingredients_openai(
     candidates: List[str],
     expanded_description: str = None,
     client=None,
-    model: str = "o3-mini",
     max_results: int = 3,
     confidence_threshold: float = 0.5,
     debug: bool = False
@@ -273,7 +273,7 @@ def rank_categories_openai(
     categories: dict,
     expanded_description: str = None,
     client=None,
-    model: str = "o3-mini",
     max_results: int = 5,
     confidence_threshold: float = 0.5,
     debug: bool = False

     candidates: List[str],
     expanded_description: str = None,
     client=None,
+    model: str = "gpt-4o-mini",
     max_results: int = 3,
     confidence_threshold: float = 0.5,
     debug: bool = False
     categories: dict,
     expanded_description: str = None,
     client=None,
+    model: str = "gpt-4o-mini",
     max_results: int = 5,
     confidence_threshold: float = 0.5,
     debug: bool = False

comparison.py CHANGED Viewed

@@ -139,7 +139,7 @@ def compare_ingredient_methods(products: List[str], ingredients_dict: Dict[str,
                 product=product,
                 candidates=candidate_ingredients,
                 client=openai_client,
-                model="o3-mini",
                 max_results=final_top_n,
                 confidence_threshold=confidence_threshold
             )

                 product=product,
                 candidates=candidate_ingredients,
                 client=openai_client,
+                model="gpt-4o-mini",
                 max_results=final_top_n,
                 confidence_threshold=confidence_threshold
             )

similarity.py CHANGED Viewed

@@ -205,7 +205,7 @@ def hybrid_ingredient_matching(products: List[str], ingredients_dict: Dict[str,
             # Apply re-ranking using OpenAI's structured output
             response = openai_client.responses.create(
-                model="o3-mini",
                 # reasoning={"effort": "low"},
                 input=[
                     {"role": "system", "content": "You are a food ingredient matching expert. Select the single best ingredient that matches the given product."},

             # Apply re-ranking using OpenAI's structured output
             response = openai_client.responses.create(
+                model="gpt-4o-mini",
                 # reasoning={"effort": "low"},
                 input=[
                     {"role": "system", "content": "You are a food ingredient matching expert. Select the single best ingredient that matches the given product."},

ui.py CHANGED Viewed

@@ -26,6 +26,11 @@ def create_demo():
                         )
                         input_controls = gr.Row()
                         with input_controls:
                             top_n = gr.Slider(1, 25, 10, step=1, label="Top N Results")
                             confidence = gr.Slider(0.1, 0.9, 0.5, label="Similarity Threshold")
@@ -50,6 +55,11 @@ def create_demo():
                         )
                         category_input_controls = gr.Row()
                         with category_input_controls:
                             category_top_n = gr.Slider(1, 10, 5, step=1, label="Top N Categories")
                             category_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
@@ -168,14 +178,14 @@ def create_demo():
         # Connect buttons for ingredient matching
         categorize_btn.click(
             fn=categorize_products,
-            inputs=[text_input, gr.State(False), top_n, confidence],
             outputs=[text_output],
         )
         # Connect buttons for category matching
         match_categories_btn.click(
             fn=categorize_products_by_category,
-            inputs=[category_text_input, gr.State(False), category_top_n, category_confidence],
             outputs=[category_output],
         )

                         )
                         input_controls = gr.Row()
                         with input_controls:
+                            use_expansion = gr.Checkbox(
+                                value=False,
+                                label="Use Description Expansion",
+                                info="Expand product descriptions using AI before matching"
+                            )
                             top_n = gr.Slider(1, 25, 10, step=1, label="Top N Results")
                             confidence = gr.Slider(0.1, 0.9, 0.5, label="Similarity Threshold")
                         )
                         category_input_controls = gr.Row()
                         with category_input_controls:
+                            category_use_expansion = gr.Checkbox(
+                                value=False,
+                                label="Use Description Expansion",
+                                info="Expand product descriptions using AI before matching"
+                            )
                             category_top_n = gr.Slider(1, 10, 5, step=1, label="Top N Categories")
                             category_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
         # Connect buttons for ingredient matching
         categorize_btn.click(
             fn=categorize_products,
+            inputs=[text_input, gr.State(False), use_expansion, top_n, confidence],
             outputs=[text_output],
         )
         # Connect buttons for category matching
         match_categories_btn.click(
             fn=categorize_products_by_category,
+            inputs=[category_text_input, gr.State(False), category_use_expansion, category_top_n, category_confidence],
             outputs=[category_output],
         )

ui_category_matching.py CHANGED Viewed

@@ -3,10 +3,12 @@ from utils import SafeProgress
 from category_matching import load_categories, match_products_to_categories
 from ui_core import parse_input
 from ui_formatters import format_categories_html
-def categorize_products_by_category(product_input, is_file=False, top_n=5, confidence_threshold=0.5, progress=gr.Progress()):
     """Categorize products by matching them to predefined categories"""
-    progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc="Starting categorization...")
     # Parse input
@@ -14,6 +16,16 @@ def categorize_products_by_category(product_input, is_file=False, top_n=5, confi
     if error:
         return error
     # Load categories
     progress_tracker(0.2, desc="Loading categories...")
     categories = load_categories()
@@ -21,11 +33,11 @@ def categorize_products_by_category(product_input, is_file=False, top_n=5, confi
     # Match products to categories
     progress_tracker(0.3, desc="Matching products to categories...")
     match_results = match_products_to_categories(
-        product_names,
         categories,
         top_n=int(top_n),
         confidence_threshold=confidence_threshold,
-        progress=progress
     )
     # Format results
@@ -33,8 +45,17 @@ def categorize_products_by_category(product_input, is_file=False, top_n=5, confi
     output_html = "<div style='font-family: Arial, sans-serif; max-width: 100%; overflow-x: auto;'>"
     output_html += f"<p style='color: #555;'>Matched {len(product_names)} products to categories.</p>"
-    for product, categories in match_results.items():
-        output_html += format_categories_html(product, categories)
         output_html += "<hr style='margin: 15px 0; border: 0; border-top: 1px solid #eee;'>"
     output_html += "</div>"

 from category_matching import load_categories, match_products_to_categories
 from ui_core import parse_input
 from ui_formatters import format_categories_html
+from openai_expansion import expand_product_descriptions
+def categorize_products_by_category(product_input, is_file=False, use_expansion=False, top_n=10, confidence_threshold=0.5):
     """Categorize products by matching them to predefined categories"""
+    progress_tracker = SafeProgress(gr.Progress())
     progress_tracker(0, desc="Starting categorization...")
     # Parse input
     if error:
         return error
+    # Optional description expansion
+    expanded_descriptions = {}
+    if use_expansion:
+        progress_tracker(0.1, desc="Expanding product descriptions...")
+        expanded_descriptions = expand_product_descriptions(product_names, progress=progress_tracker)
+        # Use expanded descriptions for matching if available
+        products_to_match = [expanded_descriptions.get(p, p) for p in product_names]
+    else:
+        products_to_match = product_names
     # Load categories
     progress_tracker(0.2, desc="Loading categories...")
     categories = load_categories()
     # Match products to categories
     progress_tracker(0.3, desc="Matching products to categories...")
     match_results = match_products_to_categories(
+        products_to_match,
         categories,
         top_n=int(top_n),
         confidence_threshold=confidence_threshold,
+        progress=progress_tracker
     )
     # Format results
     output_html = "<div style='font-family: Arial, sans-serif; max-width: 100%; overflow-x: auto;'>"
     output_html += f"<p style='color: #555;'>Matched {len(product_names)} products to categories.</p>"
+    for i, product in enumerate(product_names):
+        categories = match_results.get(products_to_match[i], [])
+        expansion_text = ""
+        if use_expansion and product in expanded_descriptions:
+            expansion_text = f"<div style='color: #666; font-style: italic; margin: 5px 0;'>Expanded description: {expanded_descriptions[product]}</div>"
+        output_html += format_categories_html(
+            product,
+            categories,
+            explanation=expansion_text
+        )
         output_html += "<hr style='margin: 15px 0; border: 0; border-top: 1px solid #eee;'>"
     output_html += "</div>"

ui_formatters.py CHANGED Viewed

@@ -61,7 +61,7 @@ METHOD_NAMES = {
     "base": "Base Embeddings",
     "voyage": "Voyage AI Reranker",
     "chicory": "Chicory Parser",
-    "openai": "OpenAI o3-mini",
     "expanded": "Expanded Description",
     "hybrid": "Hybrid Matching",
     "categories": "Category Matches"
@@ -512,7 +512,7 @@ def set_theme(theme_name):
         return True
     return False
-def format_categories_html(product, categories, chicory_result=None, header_color=None):
     """
     Format category matching results as HTML
@@ -521,12 +521,20 @@ def format_categories_html(product, categories, chicory_result=None, header_colo
         categories: List of (category, score) tuples
         chicory_result: Optional chicory parser result for the product
         header_color: Optional header background color
     Returns:
         HTML string
     """
     content = ""
     # Add Chicory results if available
     if chicory_result:
         content += f"<div style='{STYLES['info_panel']}'>"
@@ -554,3 +562,4 @@ def format_categories_html(product, categories, chicory_result=None, header_colo
     )
     return format_result_card(title=product, content=content)

     "base": "Base Embeddings",
     "voyage": "Voyage AI Reranker",
     "chicory": "Chicory Parser",
+    "openai": "OpenAI",
     "expanded": "Expanded Description",
     "hybrid": "Hybrid Matching",
     "categories": "Category Matches"
         return True
     return False
+def format_categories_html(product, categories, chicory_result=None, header_color=None, explanation=""):
     """
     Format category matching results as HTML
         categories: List of (category, score) tuples
         chicory_result: Optional chicory parser result for the product
         header_color: Optional header background color
+        explanation: Optional expanded description text
     Returns:
         HTML string
     """
     content = ""
+    # Add expanded description if available
+    if explanation:
+        content += f"<div style='{STYLES['info_panel']}'>"
+        content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Expanded Description</h4>"
+        content += f"<p style='margin-bottom: 8px;'>{explanation}</p>"
+        content += "</div>"
     # Add Chicory results if available
     if chicory_result:
         content += f"<div style='{STYLES['info_panel']}'>"
     )
     return format_result_card(title=product, content=content)

ui_ingredient_matching.py CHANGED Viewed

@@ -5,10 +5,11 @@ from similarity import compute_similarities
 from chicory_api import call_chicory_parser
 from ui_core import embeddings, parse_input
 from ui_formatters import format_categories_html, create_results_container
-def categorize_products(product_input, is_file=False, top_n=10, confidence_threshold=0.5, progress=gr.Progress()):
     """Categorize products from text input or file"""
-    progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc="Starting...")
     # Parse input
@@ -20,40 +21,52 @@ def categorize_products(product_input, is_file=False, top_n=10, confidence_thres
     if not embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
     # Create embeddings
-    progress_tracker(0.2, desc="Generating product embeddings...")
-    products_embeddings = create_product_embeddings(product_names, progress=progress)
     if not products_embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings. Please try again with different product names.</div>"
     # Call Chicory Parser API
-    progress_tracker(0.5, desc="Calling Chicory Parser API...")
-    chicory_results = call_chicory_parser(product_names, progress=progress)
     # Compute similarities
-    progress_tracker(0.7, desc="Computing similarities...")
     all_similarities = compute_similarities(embeddings, products_embeddings)
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
-    output_html = "<div style='font-family: Arial, sans-serif; max-width: 100%; overflow-x: auto;'>"
-    output_html += f"<p style='color: #555;'>Processing {len(product_names)} products.</p>"
     for product, similarities in all_similarities.items():
         filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold]
         top_similarities = filtered_similarities[:int(top_n)]
         # Debug info for Chicory results
         chicory_data = chicory_results.get(product, [])
-        output_html += format_categories_html(product, top_similarities, chicory_result=chicory_data)
         output_html += "<hr style='margin: 15px 0; border: 0; border-top: 1px solid #eee;'>"
-    output_html += "</div>"
     if not all_similarities:
         output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
     progress_tracker(1.0, desc="Done!")
-    return output_html

 from chicory_api import call_chicory_parser
 from ui_core import embeddings, parse_input
 from ui_formatters import format_categories_html, create_results_container
+from openai_expansion import expand_product_descriptions
+def categorize_products(product_input, is_file=False, use_expansion=False, top_n=10, confidence_threshold=0.5):
     """Categorize products from text input or file"""
+    progress_tracker = SafeProgress(gr.Progress())
     progress_tracker(0, desc="Starting...")
     # Parse input
     if not embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
+    # Optional description expansion
+    expanded_descriptions = {}
+    if use_expansion:
+        progress_tracker(0.2, desc="Expanding product descriptions...")
+        expanded_descriptions = expand_product_descriptions(product_names, progress=gr.Progress())
     # Create embeddings
+    progress_tracker(0.4, desc="Generating product embeddings...")
+    products_embeddings = create_product_embeddings(product_names, progress=gr.Progress())
     if not products_embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to generate product embeddings. Please try again with different product names.</div>"
     # Call Chicory Parser API
+    progress_tracker(0.6, desc="Calling Chicory Parser API...")
+    chicory_results = call_chicory_parser(product_names, progress=gr.Progress())
     # Compute similarities
+    progress_tracker(0.8, desc="Computing similarities...")
     all_similarities = compute_similarities(embeddings, products_embeddings)
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
+    output_html = f"<p style='color: #555;'>Processing {len(product_names)} products.</p>"
     for product, similarities in all_similarities.items():
         filtered_similarities = [(ingredient, score) for ingredient, score in similarities if score >= confidence_threshold]
         top_similarities = filtered_similarities[:int(top_n)]
+        # Add expansion explanation if available
+        expansion_text = expanded_descriptions.get(product, "") if use_expansion else ""
         # Debug info for Chicory results
         chicory_data = chicory_results.get(product, [])
+        output_html += format_categories_html(
+            product,
+            top_similarities,
+            chicory_result=chicory_data,
+            explanation=expansion_text
+        )
         output_html += "<hr style='margin: 15px 0; border: 0; border-top: 1px solid #eee;'>"
+    output_html += "</div>"
     if not all_similarities:
         output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
     progress_tracker(1.0, desc="Done!")
+    return create_results_container(output_html)