Spaces:

eliago
/

product_ingredient_demo

Sleeping

App Files Files Community

esilver commited on Mar 24

Commit

9a56a50

1 Parent(s): 8f1969e

some consilidation

Browse files

Files changed (6) hide show

api_utils.py +2 -2
category_matching.py +1 -1
ui.py +54 -101
ui_expanded_matching.py +50 -268
ui_formatters.py +241 -122
ui_hybrid_matching.py +50 -36

api_utils.py CHANGED Viewed

@@ -112,7 +112,7 @@ def openai_structured_query(
     prompt: str,
     system_message: str = "You are a helpful assistant.",
     schema: dict = None,
-    model: str = "o3-mini",
     client=None,
     schema_name: str = "structured_output"
 ) -> dict:
@@ -233,7 +233,7 @@ def rank_ingredients_openai(
         # Make the API call directly for more control
         response = client.responses.create(
             model=model,
-            reasoning={"effort": "low"},
             input=[
                 {"role": "system", "content": f"You are a food ingredient matching expert. Rank the top {max_results} ingredient based on how well they match the given product. Only include ingredients with relevance score >= {confidence_threshold}."},
                 {"role": "user", "content": prompt}

     prompt: str,
     system_message: str = "You are a helpful assistant.",
     schema: dict = None,
+    model: str = "gpt-4o-mini",
     client=None,
     schema_name: str = "structured_output"
 ) -> dict:
         # Make the API call directly for more control
         response = client.responses.create(
             model=model,
+            # reasoning={"effort": "low"},
             input=[
                 {"role": "system", "content": f"You are a food ingredient matching expert. Rank the top {max_results} ingredient based on how well they match the given product. Only include ingredients with relevance score >= {confidence_threshold}."},
                 {"role": "user", "content": prompt}

category_matching.py CHANGED Viewed

@@ -220,7 +220,7 @@ def hybrid_category_matching(products: List[str], categories: Dict[str, str],
         # Extract just the category descriptions for re-ranking
         candidate_ids = [c[0] for c in candidates]
-        candidate_texts = [f"Category: {c[1]}" for c in candidates]
         try:
             # Apply re-ranking to the candidates

         # Extract just the category descriptions for re-ranking
         candidate_ids = [c[0] for c in candidates]
+        candidate_texts = [f"{c[1]}" for c in candidates]
         try:
             # Apply re-ranking to the candidates

ui.py CHANGED Viewed

@@ -1,16 +1,12 @@
 import gradio as gr
 from comparison import compare_ingredient_methods_ui
-# Import from our new UI modules
 from ui_core import embeddings, get_css, load_examples
 from ui_ingredient_matching import categorize_products
 from ui_category_matching import categorize_products_by_category
 from ui_hybrid_matching import categorize_products_with_voyage_reranking
 from ui_expanded_matching import categorize_products_with_openai_reranking
-from ui_formatters import get_formatted_css
 def create_demo():
     """Create the Gradio interface"""
@@ -19,7 +15,7 @@ def create_demo():
         with gr.Tabs() as tabs:
             # Original Ingredient Matching Tab
-            with gr.TabItem("Ingredient Matching"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         # Input section
@@ -43,7 +39,7 @@ def create_demo():
             # New Category Matching Tab
-            with gr.TabItem("Category Matching"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         # Input section
@@ -65,73 +61,59 @@ def create_demo():
                         # Results section
                         category_output = gr.HTML(label="Category Matching Results", elem_id="results-container")
-            # Replace the "Hybrid Category Matching" tab
-            with gr.TabItem("Voyage AI Reranking"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        # Input section
-                        voyage_text_input = gr.Textbox(
-                            lines=10,
-                            placeholder="Enter product names, one per line",
-                            label="Product Names"
-                        )
-                        voyage_input_controls = gr.Row()
-                        with voyage_input_controls:
-                            voyage_expansion_switch = gr.Checkbox(value=False, label="Use Description Expansion",
-                                                                info="Expand product descriptions using AI before matching")
-                            voyage_embedding_top_n = gr.Slider(1, 50, 20, step=1, label="Embedding Top N Results")
-                            voyage_final_top_n = gr.Slider(1, 10, 5, step=1, label="Final Top N Categories")
-                            voyage_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
-                        # Add this to the Voyage AI tab, similar to the OpenAI tab:
-                        voyage_match_type = gr.Radio(
-                            choices=["ingredients", "categories"],
-                            value="categories",  # Default to categories since that was the original focus
-                            label="Match Type",
-                            info="Choose whether to match against ingredients or categories"
-                        )
-                        with gr.Row():
-                            voyage_examples_btn = gr.Button("Load Examples", variant="secondary")
-                            voyage_match_btn = gr.Button("Match using Voyage Reranking", variant="primary")
-                    with gr.Column(scale=1):
-                        # Results section
-                        voyage_output = gr.HTML(label="Voyage Reranking Results", elem_id="results-container")
-            # Replace the "Expanded Description Matching" tab
-            with gr.TabItem("OpenAI Reranking"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        # Input section
-                        openai_text_input = gr.Textbox(
-                            lines=10,
-                            placeholder="Enter product names, one per line",
-                            label="Product Names"
-                        )
-                        openai_input_controls = gr.Row()
-                        with openai_input_controls:
-                            openai_expansion_switch = gr.Checkbox(value=False, label="Use Description Expansion",
-                                    info="Expand product descriptions using AI before matching")
-                            openai_top_n = gr.Slider(1, 20, 10, step=1, label="Top N Results")
-                            openai_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
-                        # Add toggle here for matching type
-                        openai_match_type = gr.Radio(
-                            choices=["ingredients", "categories"],
-                            value="ingredients",
-                            label="Match Type",
-                            info="Choose whether to match against ingredients or categories"
-                        )
-                        with gr.Row():
-                            openai_match_btn = gr.Button("Match with OpenAI Reranking", variant="primary")
-                            openai_examples_btn = gr.Button("Load Examples")
-                    with gr.Column(scale=1):
-                        # Results section
-                        openai_output = gr.HTML(label="OpenAI Reranking Results", elem_id="results-container")
             # New Comparison Tab
             with gr.TabItem("Compare Methods"):
@@ -196,37 +178,8 @@ def create_demo():
             inputs=[category_text_input, gr.State(False), category_top_n, category_confidence],
             outputs=[category_output],
         )
-        # Connect buttons for Voyage reranking (previously hybrid matching)
-        voyage_match_btn.click(
-            fn=categorize_products_with_voyage_reranking,  # New function to create
-            inputs=[voyage_text_input, gr.State(False), voyage_expansion_switch, voyage_embedding_top_n,
-                    voyage_final_top_n, voyage_confidence, voyage_match_type],
-            outputs=[voyage_output],
-        )
-        voyage_examples_btn.click(
-            fn=load_examples,
-            inputs=[],
-            outputs=voyage_text_input
-        )
-        # Connect buttons for OpenAI reranking (previously expanded description matching)
-        openai_match_btn.click(
-            fn=categorize_products_with_openai_reranking,  # New function to create
-            inputs=[openai_text_input, gr.State(False), openai_expansion_switch,
-                    openai_top_n, openai_confidence, openai_match_type],
-            outputs=[openai_output],
-        )
-        openai_examples_btn.click(
-            fn=load_examples,
-            inputs=[],
-            outputs=openai_text_input
-        )
-        # Examples buttons
         examples_btn.click(
             fn=load_examples,
             inputs=[],

 import gradio as gr
 from comparison import compare_ingredient_methods_ui
+# Import from our UI modules
 from ui_core import embeddings, get_css, load_examples
 from ui_ingredient_matching import categorize_products
 from ui_category_matching import categorize_products_by_category
 from ui_hybrid_matching import categorize_products_with_voyage_reranking
 from ui_expanded_matching import categorize_products_with_openai_reranking
 def create_demo():
     """Create the Gradio interface"""
         with gr.Tabs() as tabs:
             # Original Ingredient Matching Tab
+            with gr.TabItem("Ingredient Embeddings"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         # Input section
             # New Category Matching Tab
+            with gr.TabItem("Category Embeddings"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         # Input section
                         # Results section
                         category_output = gr.HTML(label="Category Matching Results", elem_id="results-container")
+            # Common function to create reranking UI tabs
+            def create_reranking_tab(tab_name, fn_name, default_match="ingredients"):
+                with gr.TabItem(tab_name):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            # Input section
+                            tab_input = gr.Textbox(
+                                lines=10,
+                                placeholder="Enter product names, one per line",
+                                label="Product Names"
+                            )
+                            with gr.Row():
+                                tab_expansion = gr.Checkbox(
+                                    value=False,
+                                    label="Use Description Expansion",
+                                    info="Expand product descriptions using AI before matching"
+                                )
+                                tab_emb_top_n = gr.Slider(1, 50, 20, step=1, label="Embedding Top N Results")
+                                tab_top_n = gr.Slider(1, 10, 5, step=1, label="Final Top N Results")
+                                tab_confidence = gr.Slider(0.1, 0.9, 0.5, label="Matching Threshold")
+                            tab_match_type = gr.Radio(
+                                choices=["ingredients", "categories"],
+                                value=default_match,
+                                label="Match Type",
+                                info="Choose whether to match against ingredients or categories"
+                            )
+                            with gr.Row():
+                                tab_examples_btn = gr.Button("Load Examples", variant="secondary")
+                                tab_match_btn = gr.Button(f"Match using {tab_name}", variant="primary")
+                        with gr.Column(scale=1):
+                            # Results section
+                            tab_output = gr.HTML(label=f"{tab_name} Results", elem_id="results-container")
+                    # Connect button events
+                    tab_match_btn.click(
+                        fn=fn_name,
+                        inputs=[tab_input, gr.State(False), tab_expansion, tab_emb_top_n,
+                                tab_top_n, tab_confidence, tab_match_type],
+                        outputs=[tab_output],
+                    )
+                    tab_examples_btn.click(
+                        fn=load_examples,
+                        inputs=[],
+                        outputs=tab_input
+                    )
+            # Create the reranking tabs using the shared function
+            create_reranking_tab("Voyage AI Reranking", categorize_products_with_voyage_reranking, "categories")
+            create_reranking_tab("OpenAI Reranking", categorize_products_with_openai_reranking, "ingredients")
             # New Comparison Tab
             with gr.TabItem("Compare Methods"):
             inputs=[category_text_input, gr.State(False), category_top_n, category_confidence],
             outputs=[category_output],
         )
+        # Examples buttons for the first two tabs
         examples_btn.click(
             fn=load_examples,
             inputs=[],

ui_expanded_matching.py CHANGED Viewed

@@ -4,248 +4,19 @@ from embeddings import create_product_embeddings
 from similarity import compute_similarities
 from openai_expansion import expand_product_descriptions
 from ui_core import embeddings, parse_input, CATEGORY_EMBEDDINGS_PATH
-from ui_formatters import format_expanded_results_html, create_results_container
 from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
 from category_matching import load_categories, load_category_embeddings
 import json
-import os
-def categorize_products_with_expansion(product_input, is_file=False, top_n=10, confidence_threshold=0.5, match_type="ingredients", progress=gr.Progress()):
-    """
-    Categorize products using expanded descriptions from OpenAI
-    Args:
-        product_input: Text input with product names
-        is_file: Whether the input is a file
-        top_n: Number of top results to show
-        confidence_threshold: Confidence threshold for matches
-        match_type: Either "ingredients" or "categories"
-        progress: Progress tracking object
-    Returns:
-        HTML formatted results
-    """
-    progress_tracker = SafeProgress(progress)
-    progress_tracker(0, desc="Starting...")
-    # Parse input
-    product_names, error = parse_input(product_input, is_file)
-    if error:
-        return error
-    # Validate embeddings are loaded if doing ingredient matching
-    if match_type == "ingredients" and not embeddings:
-        return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
-    # Expand product descriptions
-    progress_tracker(0.2, desc="Expanding product descriptions...")
-    expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
-    if not expanded_descriptions:
-        return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: Failed to expand product descriptions. Please try again or check your OpenAI API key.</div>"
-    # Get shared OpenAI client
-    openai_client = get_openai_client()
-    if match_type == "ingredients":
-        # Generate product embeddings
-        progress_tracker(0.4, desc="Generating product embeddings...")
-        product_embeddings = create_product_embeddings(product_names, progress=progress)
-        # Compute embedding similarities for ingredients
-        progress_tracker(0.6, desc="Computing ingredient similarities...")
-        all_similarities = compute_similarities(embeddings, product_embeddings)
-        if not all_similarities:
-            return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
-        # Setup for OpenAI reranking
-        embedding_top_n = 20  # Number of candidates to consider from embeddings
-        progress_tracker(0.7, desc="Re-ranking with expanded descriptions...")
-        # Function for processing each product
-        def process_reranking(product):
-            if product not in all_similarities:
-                return product, []
-            candidates = all_similarities[product][:embedding_top_n]
-            if not candidates:
-                return product, []
-            candidate_ingredients = [c[0] for c in candidates]
-            expanded_text = expanded_descriptions.get(product, "")
-            try:
-                # Use the shared utility function
-                reranked_ingredients = rank_ingredients_openai(
-                    product=product,
-                    candidates=candidate_ingredients,
-                    expanded_description=expanded_text,
-                    client=openai_client,
-                    model="o3-mini",
-                    max_results=top_n,
-                    confidence_threshold=confidence_threshold,
-                    debug=True
-                )
-                return product, reranked_ingredients
-            except Exception as e:
-                print(f"Error reranking {product}: {e}")
-                # Fall back to top embedding match
-                return product, candidates[:1] if candidates[0][1] >= confidence_threshold else []
-        # Process all products in parallel
-        final_results = process_in_parallel(
-            items=product_names,
-            processor_func=process_reranking,
-            max_workers=min(10, len(product_names)),
-            progress_tracker=progress_tracker,
-            progress_start=0.7,
-            progress_end=0.9,
-            progress_desc="Re-ranking"
-        )
-    else:  # categories
-        # Load category embeddings instead of JSON categories
-        progress_tracker(0.5, desc="Loading category embeddings...")
-        category_embeddings = load_category_embeddings()
-        if not category_embeddings:
-            return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category embeddings found. Please check that the embeddings file exists at data/category_embeddings.pickle.</div>"
-        # Generate product embeddings
-        progress_tracker(0.6, desc="Generating product embeddings...")
-        product_embeddings = create_product_embeddings(product_names, progress=progress)
-        # Compute embedding similarities for categories
-        progress_tracker(0.7, desc="Computing category similarities...")
-        all_similarities = compute_similarities(category_embeddings, product_embeddings)
-        print(f'All similarities: {all_similarities}')
-        if not all_similarities:
-            return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
-        embedding_top_n = min(20, top_n * 2)  # Number of candidates to consider from embeddings
-        # Collect all needed category IDs first
-        needed_category_ids = set()
-        for product, similarities in all_similarities.items():
-            for category_id, score in similarities[:embedding_top_n]:
-                if score >= confidence_threshold:
-                    needed_category_ids.add(category_id)
-        # Load only the needed categories from JSON
-        progress_tracker(0.75, desc="Loading category descriptions...")
-        category_descriptions = {}
-        if needed_category_ids:
-            try:
-                with open("categories.json", 'r') as f:
-                    categories_list = json.load(f)
-                    for item in categories_list:
-                        if item["id"] in needed_category_ids:
-                            category_descriptions[item["id"]] = item["text"]
-            except Exception as e:
-                print(f"Error loading category descriptions: {e}")
-        # Function to process each product
-        def process_category_matching(product):
-            if product not in all_similarities:
-                return product, []
-            # candidates = all_similarities[product][:embedding_top_n]
-            candidates = all_similarities[product][:embedding_top_n]
-            print(f'candidates: {candidates}')
-            if not candidates:
-                return product, []
-            # Get the expanded description
-            expanded_text = expanded_descriptions.get(product, "")
-            try:
-                # Use rank_categories_openai instead of match_products_to_categories_with_description
-                category_matches = rank_categories_openai(
-                    product=product,
-                    categories=category_descriptions,
-                    expanded_description=expanded_text,
-                    client=openai_client,
-                    # model="o3-mini",
-                    model="gpt-4o-mini",
-                    # model="gpt-4o",
-                    max_results=top_n,
-                    confidence_threshold=confidence_threshold,
-                    debug=True
-                )
-                # Format results with category descriptions if needed
-                formatted_matches = []
-                for category_id, score in category_matches:
-                    category_text = category_descriptions.get(category_id, "Unknown category")
-                    formatted_matches.append((category_id, category_text, score))
-                return product, formatted_matches
-            except Exception as e:
-                print(f"Error matching {product} to categories: {e}")
-                return product, []
-        # Process all products in parallel
-        final_results = process_in_parallel(
-            items=product_names,
-            processor_func=process_category_matching,
-            max_workers=min(10, len(product_names)),
-            progress_tracker=progress_tracker,
-            progress_start=0.7,
-            progress_end=0.9,
-            progress_desc="Category matching"
-        )
-    # Format results
-    progress_tracker(0.9, desc="Formatting results...")
-    result_elements = []
-    for product, matches in final_results.items():
-        result_elements.append(
-            format_expanded_results_html(
-                product=product,
-                results=matches,
-                expanded_description=expanded_descriptions.get(product, ""),
-                match_type=match_type
-            )
-        )
-    output_html = create_results_container(
-        result_elements,
-        header_text=f"Matched {len(product_names)} products to {match_type} using expanded descriptions."
-    )
-    if not final_results:
-        output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
-    progress_tracker(1.0, desc="Done!")
-    return output_html
-def categorize_products_with_openai_reranking(product_input, is_file=False, expansion_strength=0.0,
-                                             top_n=10, confidence_threshold=0.5, match_type="ingredients",
-                                             progress=gr.Progress()):
     """
     Categorize products using OpenAI reranking with optional description expansion
-    Args:
-        product_input: Text input with product names
-        is_file: Whether the input is a file
-        expansion_strength: 0.0-1.0 slider value for description expansion (0=none, 1=full)
-        top_n: Number of top results to show
-        confidence_threshold: Confidence threshold for matches
-        match_type: Either "ingredients" or "categories"
-        progress: Progress tracking object
-    Returns:
-        HTML formatted results
     """
     progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc="Starting OpenAI reranking...")
     # Parse input
     product_names, error = parse_input(product_input, is_file)
     if error:
@@ -254,15 +25,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
     # Validate embeddings are loaded if doing ingredient matching
     if match_type == "ingredients" and not embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
     # Optional description expansion
     expanded_descriptions = {}
-    if expansion_strength > 0:
         progress_tracker(0.2, desc="Expanding product descriptions...")
         expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
-    else:
-        # If no expansion, use product names as is (minimal descriptions)
-        expanded_descriptions = {product: product for product in product_names}
     # Get shared OpenAI client
     openai_client = get_openai_client()
@@ -279,9 +46,6 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
         if not all_similarities:
             return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
-        # Setup for OpenAI reranking
-        embedding_top_n = 20  # Number of candidates to consider from embeddings
         progress_tracker(0.7, desc="Re-ranking with OpenAI...")
         # Function for processing each product
@@ -294,10 +58,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
                 return product, []
             candidate_ingredients = [c[0] for c in candidates]
-            expanded_text = expanded_descriptions.get(product, product)
             try:
-                # Use the shared utility function
                 reranked_ingredients = rank_ingredients_openai(
                     product=product,
                     candidates=candidate_ingredients,
@@ -305,7 +70,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
                     client=openai_client,
                     model="o3-mini",
                     max_results=top_n,
-                    confidence_threshold=confidence_threshold,
                     debug=True
                 )
@@ -314,7 +79,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
             except Exception as e:
                 print(f"Error reranking {product}: {e}")
                 # Fall back to top embedding match
-                return product, candidates[:1] if candidates[0][1] >= confidence_threshold else []
         # Process all products in parallel
         final_results = process_in_parallel(
@@ -346,14 +111,11 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
         if not all_similarities:
             return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
-        embedding_top_n = min(20, top_n * 2)  # Number of candidates to consider from embeddings
-        # Collect all needed category IDs first
         needed_category_ids = set()
         for product, similarities in all_similarities.items():
             for category_id, score in similarities[:embedding_top_n]:
-                if score >= confidence_threshold:
-                    needed_category_ids.add(category_id)
         # Load only the needed categories from JSON
         progress_tracker(0.75, desc="Loading category descriptions...")
@@ -378,9 +140,10 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
                 return product, []
             # Get the expanded description or use product name if no expansion
-            expanded_text = expanded_descriptions.get(product, product)
             try:
                 category_matches = rank_categories_openai(
                     product=product,
                     categories=category_descriptions,
@@ -388,7 +151,7 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
                     client=openai_client,
                     model="gpt-4o-mini",
                     max_results=top_n,
-                    confidence_threshold=confidence_threshold,
                     debug=True
                 )
@@ -417,24 +180,43 @@ def categorize_products_with_openai_reranking(product_input, is_file=False, expa
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
-    result_elements = []
     for product, matches in final_results.items():
-        result_elements.append(
-            format_expanded_results_html(
-                product=product,
-                results=matches,
-                expanded_description=expanded_descriptions.get(product, ""),
-                match_type=match_type
-            )
-        )
-    output_html = create_results_container(
-        result_elements,
-        header_text=f"Matched {len(product_names)} products to {match_type} using OpenAI reranking."
-    )
-    if not final_results:
-        output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
     progress_tracker(1.0, desc="Done!")
-    return output_html

 from similarity import compute_similarities
 from openai_expansion import expand_product_descriptions
 from ui_core import embeddings, parse_input, CATEGORY_EMBEDDINGS_PATH
+from ui_formatters import format_reranking_results_html
 from api_utils import get_openai_client, process_in_parallel, rank_ingredients_openai, rank_categories_openai
 from category_matching import load_categories, load_category_embeddings
 import json
+def categorize_products_with_openai_reranking(product_input, is_file=False, use_expansion=False,
+                                           embedding_top_n=20, top_n=10, confidence_threshold=0.5,
+                                           match_type="ingredients", progress=gr.Progress()):
     """
     Categorize products using OpenAI reranking with optional description expansion
     """
     progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc="Starting OpenAI reranking...")
     # Parse input
     product_names, error = parse_input(product_input, is_file)
     if error:
     # Validate embeddings are loaded if doing ingredient matching
     if match_type == "ingredients" and not embeddings:
         return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No ingredient embeddings loaded. Please check that the embeddings file exists and is properly formatted.</div>"
     # Optional description expansion
     expanded_descriptions = {}
+    if use_expansion:
         progress_tracker(0.2, desc="Expanding product descriptions...")
         expanded_descriptions = expand_product_descriptions(product_names, progress=progress)
     # Get shared OpenAI client
     openai_client = get_openai_client()
         if not all_similarities:
             return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No similarities found. Please try different product names.</div>"
         progress_tracker(0.7, desc="Re-ranking with OpenAI...")
         # Function for processing each product
                 return product, []
             candidate_ingredients = [c[0] for c in candidates]
+            expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
             try:
+                # Use the shared utility function - now passing 0.0 as threshold to get all results
+                # We'll apply the threshold at display time
                 reranked_ingredients = rank_ingredients_openai(
                     product=product,
                     candidates=candidate_ingredients,
                     client=openai_client,
                     model="o3-mini",
                     max_results=top_n,
+                    confidence_threshold=0.0,  # Don't filter here, do it at display time
                     debug=True
                 )
             except Exception as e:
                 print(f"Error reranking {product}: {e}")
                 # Fall back to top embedding match
+                return product, candidates[:1]  # Don't filter here
         # Process all products in parallel
         final_results = process_in_parallel(
         if not all_similarities:
             return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>Error: No category similarities found. Please try different product names.</div>"
+        # Collect all needed category IDs first - don't filter by threshold here
         needed_category_ids = set()
         for product, similarities in all_similarities.items():
             for category_id, score in similarities[:embedding_top_n]:
+                needed_category_ids.add(category_id)
         # Load only the needed categories from JSON
         progress_tracker(0.75, desc="Loading category descriptions...")
                 return product, []
             # Get the expanded description or use product name if no expansion
+            expanded_text = expanded_descriptions.get(product, product) if use_expansion else product
             try:
+                # Pass 0.0 as threshold to get all results - apply threshold at display time
                 category_matches = rank_categories_openai(
                     product=product,
                     categories=category_descriptions,
                     client=openai_client,
                     model="gpt-4o-mini",
                     max_results=top_n,
+                    confidence_threshold=0.0,  # Don't filter here
                     debug=True
                 )
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
+    # Create a list of result dictionaries in consistent format
+    formatted_results = []
     for product, matches in final_results.items():
+        # Include all products, even with no matches
+        formatted_result = {
+            "product_name": product,
+            "confidence": max([item[-1] for item in matches]) if matches else 0,
+            "matching_items": [],
+            "item_scores": [],  # Add item_scores to align with Voyage implementation
+            "explanation": expanded_descriptions.get(product, "") if use_expansion else ""
+        }
+        # Format matching items based on match type
+        if match_type == "ingredients":
+            formatted_result["matching_items"] = [item for item, score in matches]
+            formatted_result["item_scores"] = [score for item, score in matches]
+        else:  # categories
+            for cat_id, cat_desc, score in matches:
+                formatted_result["matching_items"].append(
+                    f"{cat_id}: {cat_desc}" if cat_desc else f"{cat_id}"
+                )
+                formatted_result["item_scores"].append(score)
+        formatted_results.append(formatted_result)
+    if not formatted_results:
+        return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
+    result_html = format_reranking_results_html(
+        results=formatted_results,
+        match_type=match_type,
+        show_scores=True,
+        include_explanation=use_expansion,
+        method="openai",
+        confidence_threshold=confidence_threshold  # Pass the threshold to the formatter
+    )
     progress_tracker(1.0, desc="Done!")
+    return result_html

ui_formatters.py CHANGED Viewed

@@ -190,34 +190,92 @@ def format_comparison_html(product, method_results):
     # Create the full card with the methods content
     return format_result_card(title=product, content=methods_html)
-def format_expanded_results_html(product, results, expanded_description, match_type="ingredients"):
-    """Format results using expanded descriptions"""
-    content = ""
-    # Add expanded description section using shared function
-    content += format_info_panel("Expanded Description", expanded_description)
-    # Add results section using shared function
-    title_text = "Ingredients" if match_type == "ingredients" else "Categories"
-    content += format_results_section(results, title_text, match_type)
-    return format_result_card(title=product, content=content)
-def format_hybrid_results_html(product, results, summary, expanded_description=""):
-    """Format the hybrid matching results as HTML."""
-    content = ""
-    # Add expanded description if provided
-    if expanded_description:
-        content += format_info_panel("Expanded Description", expanded_description)
-    # Add summary
-    content += f"<p>{summary}</p>"
-    # Add results section using shared function
-    content += format_results_section(results, "Results", "hybrid")
-    return format_result_card(title=product, content=content)
 def create_results_container(html_elements, header_text=None):
     """
@@ -240,46 +298,136 @@ def create_results_container(html_elements, header_text=None):
     return container
-def format_categories_html(product, categories, chicory_result=None, header_color=None):
     """
-    Format category matching results as HTML
     Args:
         product: Product name
-        categories: List of (category, score) tuples
-        chicory_result: Optional chicory parser result for the product
-        header_color: Optional header background color
     Returns:
-        HTML string
     """
     content = ""
-    # Add Chicory results if available
-    if chicory_result:
         content += f"<div style='{STYLES['info_panel']}'>"
-        content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Chicory Parser Results</h4>"
-        if isinstance(chicory_result, dict):
-            ingredient = chicory_result.get("ingredient", "Not found")
-            confidence = chicory_result.get("confidence", 0)
-            confidence_percent = int(confidence * 100)
-            content += f"<div style='display: flex; justify-content: space-between; align-items: center; padding: 8px; border-radius: 4px;'>"
-            content += f"<span style='font-weight: bold;'>{ingredient}</span>"
-            content += f"<span style='background-color: {get_confidence_bg_color(confidence)}; border: 1px solid {get_confidence_color(confidence)}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center;'>Confidence: {confidence_percent}%</span>"
-            content += "</div>"
-        else:
-            content += f"<p style='{STYLES['empty_message']}'>No Chicory results available</p>"
         content += "</div>"
-    # Add the category results
-    content += format_method_results(
-        method_key="categories",
-        results=categories,
-        color_hex=header_color or METHOD_COLORS.get("categories", "#1abc9c")
-    )
     return format_result_card(title=product, content=content)
@@ -288,7 +436,7 @@ def get_formatted_css():
     Generate CSS for the UI based on current theme
     Returns:
-        CSS string ready to use in Gradio
     """
     return f"""
         .gradio-container .prose {{
@@ -336,102 +484,73 @@ def get_formatted_css():
 def set_theme(theme_name):
     """
-    Set the UI theme (light or dark)
     Args:
-        theme_name: 'light' or 'dark'
     Returns:
-        None - updates global variables
     """
     global THEME, COLORS, STYLES
     if theme_name in THEMES:
         THEME = theme_name
         COLORS = THEMES[THEME]
-        # Update styles with new theme colors
-        STYLES.update({
             "card": f"margin-bottom: 20px; border: 1px solid {COLORS['card_border']}; border-radius: 8px; overflow: hidden; background-color: {COLORS['card_bg']};",
             "header": f"background-color: {COLORS['header_bg']}; padding: 12px 15px; border-bottom: 1px solid {COLORS['card_border']};",
             "header_text": f"margin: 0; font-size: 18px; color: {COLORS['header_text']};",
             "method_container": f"flex: 1; min-width: 200px; padding: 15px; border-right: 1px solid {COLORS['card_border']};",
             "method_title": f"margin-top: 0; color: {COLORS['text_primary']}; padding-bottom: 8px;",
             "info_panel": f"padding: 10px; background-color: {COLORS['section_bg']}; margin-bottom: 10px; border-radius: 4px;"
-        })
-def format_result_item(result):
-    """Format a single result item with confidence badge.
-    Args:
-        result: Tuple containing (name, score) or (id, name, score)
-    Returns:
-        HTML string for the result item or None if invalid format
     """
-    # Handle both 2-tuple and 3-tuple formats
-    if len(result) == 3:
-        category_id, category_name, score = result
-        display_text = f"<strong>{category_id}</strong>: {category_name}"
-    elif len(result) == 2:
-        display_text, score = result
-    else:
-        return None  # Skip any invalid formats
-    confidence_percent = int(score * 100)
-    confidence_color = get_confidence_color(score)
-    bg_color = get_confidence_bg_color(score)
-    item_html = f"<li style='display: flex; justify-content: space-between; align-items: center; margin-bottom: 4px;'>"
-    item_html += f"<span style='font-weight: 500; flex: 1;'>{display_text}</span>"
-    item_html += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center; margin-left: 8px;'>Confidence: {confidence_percent}%</span>"
-    item_html += "</li>"
-    return item_html
-def format_info_panel(title, content):
-    """Format an information panel with title and content.
     Args:
-        title: Panel title
-        content: Panel content (HTML or text)
     Returns:
-        HTML string for the info panel
     """
-    panel_html = f"<div style='{STYLES['info_panel']}'>"
-    panel_html += f"<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>{title}</h4>"
-    panel_html += f"<p style='margin-bottom: 8px;'>{content}</p>"
-    panel_html += "</div>"
-    return panel_html
-def format_results_section(results, section_title, match_type="ingredients"):
-    """Format a results section with title and result items.
-    Args:
-        results: List of result tuples
-        section_title: Title for the results section
-        match_type: Type of matching used (for color styling)
-    Returns:
-        HTML string for the results section
-    """
-    color_hex = METHOD_COLORS.get(match_type, "#1abc9c")
-    section_html = f"<div class='method-results' style='margin-top: 15px; border-left: 3px solid {color_hex}; padding-left: 15px;'>"
-    section_html += f"<h4 style='margin-top: 0; color: {color_hex};'>{section_title}</h4>"
-    if results:
-        section_html += "<ul style='margin-top: 5px; padding-left: 20px;'>"
-        for result in results:
-            item_html = format_result_item(result)
-            if item_html:
-                section_html += item_html
-        section_html += "</ul>"
-    else:
-        section_html += "<p style='color: #777; font-style: italic; margin: 5px 0;'>No matches found above confidence threshold.</p>"
-    section_html += "</div>"
-    return section_html

     # Create the full card with the methods content
     return format_result_card(title=product, content=methods_html)
+def format_reranking_results_html(results, match_type="ingredients", show_scores=True, include_explanation=False,
+                                  method="voyage", confidence_threshold=0.0):
+    """
+    Unified formatter that works for both Voyage and OpenAI results, using the individual elements approach
+    with the original visual style.
+    Args:
+        results: List of result dictionaries
+        match_type: Either "ingredients" or "categories"
+        show_scores: Whether to show confidence scores
+        include_explanation: Whether to include expanded descriptions
+        method: Method used for ranking ("voyage" or "openai")
+        confidence_threshold: Threshold for filtering individual items (default 0.0 shows all)
+    Returns:
+        HTML string for displaying results
+    """
+    if not results or len(results) == 0:
+        return f"No {match_type.lower()} matches found."
+    # Method-specific styling
+    method_color = METHOD_COLORS.get(method, "#777777")
+    method_name = METHOD_NAMES.get(method, method.capitalize())
+    # Create a header text
+    header_text = f"Matched {len(results)} products to {match_type} using {method_name}"
+    # Generate individual HTML elements for each result - using the old style approach
+    html_elements = []
+    for result in results:
+        product_name = result.get("product_name", "")
+        matching_items = result.get("matching_items", [])
+        item_scores = result.get("item_scores", [])
+        explanation = result.get("explanation", "") if include_explanation else ""
+        # Convert matching items into tuples with scores for format_expanded_results_html
+        formatted_matches = []
+        # Make sure we have scores for all items
+        if len(item_scores) != len(matching_items):
+            # If scores are missing, use overall confidence for all
+            result_confidence = result.get("confidence", 0.5)
+            item_scores = [result_confidence] * len(matching_items)
+        for i, item in enumerate(matching_items):
+            score = item_scores[i]
+            if ":" in item and match_type == "categories":
+                # Handle category format "id: description"
+                parts = item.split(":", 1)
+                cat_id = parts[0].strip()
+                cat_text = parts[1].strip() if len(parts) > 1 else ""
+                formatted_matches.append((cat_id, cat_text, score))
+            else:
+                # Handle ingredient format (just name and score)
+                formatted_matches.append((item, score))
+        # Only skip if there are no matches at all
+        if not formatted_matches:
+            continue
+        # Use the older style formatter with threshold
+        if include_explanation:
+            # Use expanded_results_html for the old style with expanded descriptions
+            element_html = format_expanded_results_html(
+                product=product_name,
+                results=formatted_matches,
+                expanded_description=explanation,
+                match_type=match_type,
+                confidence_threshold=confidence_threshold
+            )
+        else:
+            # Use hybrid_results_html when no expanded description is available
+            summary_text = f"{match_type.capitalize()} matches using {method_name}."
+            element_html = format_hybrid_results_html(
+                product=product_name,
+                results=formatted_matches,
+                summary=summary_text,
+                expanded_description="",
+                confidence_threshold=confidence_threshold
+            )
+        html_elements.append(element_html)
+    # Combine all elements into a container
+    return create_results_container(html_elements, header_text=header_text)
 def create_results_container(html_elements, header_text=None):
     """
     return container
+def filter_results_by_threshold(results, confidence_threshold=0.0):
+    """Helper function to filter results by confidence threshold"""
+    filtered_results = []
+    for item in results:
+        # Handle both 2-value (match, score) and 3-value (id, text, score) tuples
+        score = item[-1] if isinstance(item, tuple) and len(item) >= 2 else 0.0
+        # Only include results above the threshold
+        if score >= confidence_threshold:
+            filtered_results.append(item)
+    return filtered_results
+def parse_result_item(item):
+    """Helper function to parse result items into display text and score"""
+    # Handle both 2-value (match, score) and 3-value (id, text, score) tuples
+    if isinstance(item, tuple):
+        if len(item) == 2:
+            match, score = item
+            display_text = match
+        elif len(item) == 3:
+            cat_id, cat_text, score = item
+            display_text = f"{cat_id}: {cat_text}" if cat_text else cat_id
+        else:
+            display_text = str(item)
+            score = 0.0
+    else:
+        display_text = str(item)
+        score = 0.0
+    return display_text, score
+def format_expanded_results_html(product, results, expanded_description, match_type="ingredients", confidence_threshold=0.0):
+    """Format results using expanded descriptions"""
+    content = ""
+    # Add expanded description section
+    content += f"<div style='{STYLES['info_panel']}'>"
+    content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Expanded Description</h4>"
+    content += f"<p style='margin-bottom: 8px;'>{expanded_description}</p>"
+    content += "</div>"
+    # Format the results section - create custom section
+    color_hex = METHOD_COLORS.get(match_type, "#1abc9c")
+    # Add results section with custom title
+    content += f"<div class='method-results' style='margin-top: 15px; border-left: 3px solid {color_hex}; padding-left: 15px;'>"
+    title_text = "Ingredients" if match_type == "ingredients" else "Categories"
+    content += f"<h4 style='margin-top: 0; color: {color_hex};'>{title_text}</h4>"
+    # Filter results by confidence threshold
+    filtered_results = filter_results_by_threshold(results, confidence_threshold)
+    if filtered_results:
+        content += "<ul style='margin-top: 5px; padding-left: 20px;'>"
+        for item in filtered_results:
+            display_text, score = parse_result_item(item)
+            confidence_percent = int(score * 100)
+            # Improved styling for confidence percentage - using black text for better contrast
+            confidence_color = get_confidence_color(score)
+            bg_color = get_confidence_bg_color(score)
+            content += f"<li style='display: flex; justify-content: space-between; align-items: center; margin-bottom: 4px;'>"
+            content += f"<span style='font-weight: 500; flex: 1;'>{display_text}</span>"
+            content += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center; margin-left: 8px;'>Confidence: {confidence_percent}%</span>"
+            content += "</li>"
+        content += "</ul>"
+    else:
+        content += "<p style='color: #777; font-style: italic; margin: 5px 0;'>No matches found above confidence threshold.</p>"
+    content += "</div>"
+    return format_result_card(title=product, content=content)
+def format_hybrid_results_html(product, results, summary, expanded_description="", confidence_threshold=0.0):
     """
+    Format results for hybrid matching
     Args:
         product: Product name
+        results: List of result tuples (name, score) or (id, name, score)
+        summary: Summary text to display
+        expanded_description: Optional expanded description
+        confidence_threshold: Threshold for filtering individual items
     Returns:
+        HTML string for displaying results
     """
     content = ""
+    # Add summary text
+    if summary:
         content += f"<div style='{STYLES['info_panel']}'>"
+        content += f"<p style='margin: 0;'>{summary}</p>"
+        content += "</div>"
+    # Add expanded description if provided
+    if expanded_description:
+        content += f"<div style='{STYLES['info_panel']}'>"
+        content += "<h4 style='margin-top: 0; margin-bottom: 8px; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 5px;'>Expanded Description</h4>"
+        content += f"<p style='margin: 0;'>{expanded_description}</p>"
+        content += "</div>"
+    # Filter results by confidence threshold
+    filtered_results = filter_results_by_threshold(results, confidence_threshold)
+    # Format the results
+    if filtered_results:
+        content += "<div style='padding: 10px;'>"
+        content += "<table style='width: 100%; border-collapse: collapse;'>"
+        content += "<thead><tr>"
+        content += "<th style='text-align: left; padding: 8px; border-bottom: 2px solid #ddd;'>Match</th>"
+        content += "<th style='text-align: right; padding: 8px; border-bottom: 2px solid #ddd; width: 100px;'>Confidence</th>"
+        content += "</tr></thead>"
+        content += "<tbody>"
+        for item in filtered_results:
+            display_text, score = parse_result_item(item)
+            confidence_percent = int(score * 100)
+            confidence_color = get_confidence_color(score)
+            bg_color = get_confidence_bg_color(score)
+            content += "<tr>"
+            content += f"<td style='text-align: left; padding: 8px; border-bottom: 1px solid #ddd;'>{display_text}</td>"
+            content += f"<td style='text-align: center; padding: 8px; border-bottom: 1px solid #ddd;'>"
+            content += f"<span style='background-color: {bg_color}; border: 1px solid {confidence_color}; color: #000;"
+            content += f"font-weight: 600; padding: 2px 6px; border-radius: 4px; display: inline-block; width: 70px;'>"
+            content += f"{confidence_percent}%</span></td>"
+            content += "</tr>"
+        content += "</tbody></table>"
         content += "</div>"
+    else:
+        content += "<p style='color: #777; font-style: italic; padding: 10px; margin: 0;'>No matches found above confidence threshold.</p>"
     return format_result_card(title=product, content=content)
     Generate CSS for the UI based on current theme
     Returns:
+        CSS string for styling the UI
     """
     return f"""
         .gradio-container .prose {{
 def set_theme(theme_name):
     """
+    Update the global theme setting
     Args:
+        theme_name: Theme name to set ("light" or "dark")
     Returns:
+        Boolean indicating success
     """
     global THEME, COLORS, STYLES
     if theme_name in THEMES:
         THEME = theme_name
         COLORS = THEMES[THEME]
+        # Update styles with new colors
+        STYLES = {
             "card": f"margin-bottom: 20px; border: 1px solid {COLORS['card_border']}; border-radius: 8px; overflow: hidden; background-color: {COLORS['card_bg']};",
             "header": f"background-color: {COLORS['header_bg']}; padding: 12px 15px; border-bottom: 1px solid {COLORS['card_border']};",
             "header_text": f"margin: 0; font-size: 18px; color: {COLORS['header_text']};",
+            "flex_container": "display: flex; flex-wrap: wrap;",
             "method_container": f"flex: 1; min-width: 200px; padding: 15px; border-right: 1px solid {COLORS['card_border']};",
             "method_title": f"margin-top: 0; color: {COLORS['text_primary']}; padding-bottom: 8px;",
+            "item_list": "list-style-type: none; padding-left: 0;",
+            "item": "margin-bottom: 8px; padding: 8px; border-radius: 4px;",
+            "empty_message": "color: #7f8c8d; font-style: italic;",
             "info_panel": f"padding: 10px; background-color: {COLORS['section_bg']}; margin-bottom: 10px; border-radius: 4px;"
+        }
+        return True
+    return False
+def format_categories_html(product, categories, chicory_result=None, header_color=None):
     """
+    Format category matching results as HTML
     Args:
+        product: Product name
+        categories: List of (category, score) tuples
+        chicory_result: Optional chicory parser result for the product
+        header_color: Optional header background color
     Returns:
+        HTML string
     """
+    content = ""
+    # Add Chicory results if available
+    if chicory_result:
+        content += f"<div style='{STYLES['info_panel']}'>"
+        content += "<h4 style='margin-top: 0; border-bottom: 1px solid rgba(0,0,0,0.1); padding-bottom: 8px;'>Chicory Parser Results</h4>"
+        if isinstance(chicory_result, dict):
+            ingredient = chicory_result.get("ingredient", "Not found")
+            confidence = chicory_result.get("confidence", 0)
+            confidence_percent = int(confidence * 100)
+            content += f"<div style='display: flex; justify-content: space-between; align-items: center; padding: 8px; border-radius: 4px;'>"
+            content += f"<span style='font-weight: bold;'>{ingredient}</span>"
+            content += f"<span style='background-color: {get_confidence_bg_color(confidence)}; border: 1px solid {get_confidence_color(confidence)}; color: #000; font-weight: 600; padding: 2px 6px; border-radius: 4px; min-width: 70px; text-align: center;'>Confidence: {confidence_percent}%</span>"
+            content += "</div>"
+        else:
+            content += f"<p style='{STYLES['empty_message']}'>No Chicory results available</p>"
+        content += "</div>"
+    # Add the category results
+    content += format_method_results(
+        method_key="categories",
+        results=categories,
+        color_hex=header_color or METHOD_COLORS.get("categories", "#1abc9c")
+    )
+    return format_result_card(title=product, content=content)

ui_hybrid_matching.py CHANGED Viewed

@@ -3,7 +3,7 @@ from utils import SafeProgress
 from category_matching import load_categories, hybrid_category_matching
 from similarity import hybrid_ingredient_matching, compute_similarities
 from ui_core import embeddings, parse_input
-from ui_formatters import format_hybrid_results_html, create_results_container
 from openai_expansion import expand_product_descriptions
 from api_utils import get_voyage_client
@@ -12,16 +12,6 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
                                              match_type="categories", progress=gr.Progress()):
     """
     Categorize products using Voyage reranking with optional description expansion
-    Args:
-        product_input: Text input with product names
-        is_file: Whether the input is a file
-        use_expansion: Whether to use AI description expansion (boolean switch)
-        embedding_top_n: Number of embedding candidates to consider
-        final_top_n: Final number of results to return
-        confidence_threshold: Minimum confidence threshold
-        match_type: Either "ingredients" or "categories"
-        progress: Progress tracking object
     """
     progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc=f"Starting Voyage reranking for {match_type}...")
@@ -49,7 +39,7 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
             product_names, categories,
             embedding_top_n=int(embedding_top_n),
             final_top_n=int(final_top_n),
-            confidence_threshold=confidence_threshold,
             expanded_descriptions=expanded_descriptions if use_expansion else None,
             progress=progress
         )
@@ -64,7 +54,7 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
             product_names, embeddings,
             embedding_top_n=int(embedding_top_n),
             final_top_n=int(final_top_n),
-            confidence_threshold=confidence_threshold,
             expanded_descriptions=expanded_descriptions if use_expansion else None,
             progress=progress
         )
@@ -72,31 +62,55 @@ def categorize_products_with_voyage_reranking(product_input, is_file=False, use_
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
-    result_elements = []
     for product, matches in match_results.items():
-        # Include the expanded description in the results if used
-        expanded_text = expanded_descriptions.get(product, "") if use_expansion else ""
-        result_elements.append(
-            format_hybrid_results_html(
-                product=product,
-                results=matches,
-                summary=f"{match_type.capitalize()} matches using Voyage AI reranking.",
-                expanded_description=expanded_text
-            )
-        )
-    output_html = create_results_container(
-        result_elements,
-        header_text=f"Matched {len(product_names)} products to {match_type} using Voyage AI reranking."
     )
-    if not match_results:
-        output_html = "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
     progress_tracker(1.0, desc="Done!")
-    return output_html
-# Add this function for Voyage reranking
 def hybrid_ingredient_matching_voyage(products, ingredients_dict,
                                      embedding_top_n=20, final_top_n=5,
                                      confidence_threshold=0.5,
@@ -152,13 +166,13 @@ def hybrid_ingredient_matching_voyage(products, ingredients_dict,
                 model="rerank-2"
             )
-            # Process results
             voyage_results = []
             for result in reranked["results"]:
                 score = result["relevance_score"]
-                if score >= confidence_threshold:
-                    voyage_results.append((result["document"]["text"], score))
             final_results[product] = voyage_results[:final_top_n]
         except Exception as e:
@@ -167,4 +181,4 @@ def hybrid_ingredient_matching_voyage(products, ingredients_dict,
             final_results[product] = candidates[:1]
     progress_tracker(1.0, desc="Voyage ingredient matching complete")
-    return final_results

 from category_matching import load_categories, hybrid_category_matching
 from similarity import hybrid_ingredient_matching, compute_similarities
 from ui_core import embeddings, parse_input
+from ui_formatters import format_hybrid_results_html, create_results_container, format_reranking_results_html
 from openai_expansion import expand_product_descriptions
 from api_utils import get_voyage_client
                                              match_type="categories", progress=gr.Progress()):
     """
     Categorize products using Voyage reranking with optional description expansion
     """
     progress_tracker = SafeProgress(progress)
     progress_tracker(0, desc=f"Starting Voyage reranking for {match_type}...")
             product_names, categories,
             embedding_top_n=int(embedding_top_n),
             final_top_n=int(final_top_n),
+            confidence_threshold=0.0,  # Don't apply threshold here - do it in display
             expanded_descriptions=expanded_descriptions if use_expansion else None,
             progress=progress
         )
             product_names, embeddings,
             embedding_top_n=int(embedding_top_n),
             final_top_n=int(final_top_n),
+            confidence_threshold=0.0,  # Don't apply threshold here - do it in display
             expanded_descriptions=expanded_descriptions if use_expansion else None,
             progress=progress
         )
     # Format results
     progress_tracker(0.9, desc="Formatting results...")
+    # Convert to unified format for formatter
+    formatted_results = []
     for product, matches in match_results.items():
+        # Include all products, even with no matches
+        formatted_result = {
+            "product_name": product,
+            "confidence": max([item[-1] for item in matches]) if matches else 0,
+            "matching_items": [],
+            "item_scores": [],
+            "explanation": expanded_descriptions.get(product, "") if use_expansion else ""
+        }
+        # Format matching items based on match type
+        if match_type == "ingredients":
+            # Extract ingredient names and scores
+            formatted_result["matching_items"] = [item[0] for item in matches]
+            formatted_result["item_scores"] = [item[1] for item in matches]
+        else:  # categories
+            for match in matches:
+                if len(match) >= 2:
+                    cat_id = match[0]
+                    # Some category matches might include a text description
+                    cat_text = match[1] if len(match) > 2 else ""
+                    score = match[-1]
+                    if isinstance(cat_text, (int, float)):  # This is not text but a score
+                        cat_text = ""
+                    formatted_result["matching_items"].append(
+                        f"{cat_id}: {cat_text}" if cat_text else f"{cat_id}"
+                    )
+                    formatted_result["item_scores"].append(score)
+        formatted_results.append(formatted_result)
+    if not formatted_results:
+        return "<div style='color: #d32f2f; font-weight: bold; padding: 20px;'>No results found. Please check your input or try different products.</div>"
+    result_html = format_reranking_results_html(
+        results=formatted_results,
+        match_type=match_type,
+        show_scores=True,
+        include_explanation=use_expansion,
+        method="voyage",
+        confidence_threshold=confidence_threshold  # Pass the threshold to the formatter
     )
     progress_tracker(1.0, desc="Done!")
+    return result_html
+# Update the function in ui_hybrid_matching.py
 def hybrid_ingredient_matching_voyage(products, ingredients_dict,
                                      embedding_top_n=20, final_top_n=5,
                                      confidence_threshold=0.5,
                 model="rerank-2"
             )
+            # Process results - include all results but keep the threshold for later filtering
             voyage_results = []
             for result in reranked["results"]:
                 score = result["relevance_score"]
+                voyage_results.append((result["document"]["text"], score))
+            # Still limit to final_top_n but don't filter by threshold here
             final_results[product] = voyage_results[:final_top_n]
         except Exception as e:
             final_results[product] = candidates[:1]
     progress_tracker(1.0, desc="Voyage ingredient matching complete")
+    return final_results